framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,float16,0,5.639904022216797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,float16,0,34.055188496907554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,fp8,0,5.680373509724935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,64,128,1,fp8,fp8,0,5.128853480021159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,64,128,1,fp8,fp8,0,5.205653190612793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,float16,0,5.708335876464844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,fp8,0,5.75760014851888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,fp8,0,5.792133331298828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,float16,0,5.7388051350911455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,fp8,0,34.10701243082682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,64,128,1,fp8,fp8,0,5.2498931884765625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,float16,0,3.298458735148112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,64,0,1,fp8,fp8,0,31.47369130452474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,64,0,1,fp8,fp8,0,31.525550842285156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,float16,0,34.24447377522787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,64,128,1,fp8,fp8,0,3.1228319803873696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,fp8,0,34.23193613688151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,fp8,0,3.3674453099568686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,float16,0,34.23953501383463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,float16,0,2.9313014348347983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,float16,0,17.777812957763672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,64,128,1,fp8,fp8,0,2.6703147888183594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,fp8,0,2.9539572397867837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,64,0,1,fp8,fp8,0,31.56470489501953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,64,0,1,fp8,fp8,0,16.35196812947591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,float16,0,2.9405813217163086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,fp8,0,34.30976104736328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,64,128,1,fp8,fp8,0,2.6849921544392905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,fp8,0,17.843738555908203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,float16,0,17.22707748413086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,fp8,0,2.968533198038737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,float16,0,2.9586559931437173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,fp8,0,2.985583941141764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,64,0,1,fp8,fp8,0,15.899531046549479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,fp8,0,17.253140767415363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,64,128,1,fp8,fp8,0,2.7081066767374673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,float16,0,1.7897653579711914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,float16,0,17.23145039876302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,fp8,0,1.8345707257588704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,64,128,1,fp8,fp8,0,1.713498592376709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,fp8,0,17.25766372680664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,64,0,1,fp8,fp8,0,15.88968022664388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,float16,0,1.622005303700765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,float16,0,9.083925247192383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,fp8,0,1.6324639320373535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,float16,0,17.30858612060547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,64,128,1,fp8,fp8,0,1.4945866266886394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,64,0,1,fp8,fp8,0,8.384080251057943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,fp8,0,9.130202611287435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,float16,0,1.6274933815002441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,64,0,1,fp8,fp8,0,15.930789947509766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,fp8,0,1.6404320398966472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,fp8,0,17.309290568033855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,64,128,1,fp8,fp8,0,1.500874678293864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,float16,0,8.810111999511719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,fp8,0,1.646085262298584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,64,0,1,fp8,fp8,0,8.165903727213541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,fp8,0,8.82265599568685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,float16,0,1.6342345873514812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,64,128,1,fp8,fp8,0,1.510101318359375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,float16,0,1.1735466321309407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,float16,0,8.83144505818685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,fp8,0,1.1744853655497234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,fp8,0,8.853567759195963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,64,128,1,fp8,fp8,0,1.1001866658528645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,64,0,1,fp8,fp8,0,8.178229649861654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,float16,0,8.87122662862142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,float16,0,1.1872639656066895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,float16,0,4.868927955627441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,fp8,0,1.1775519847869873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,64,128,1,fp8,fp8,0,1.1052532990773518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,64,0,1,fp8,fp8,0,8.171685536702475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,fp8,0,8.874762852986654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,64,0,1,fp8,fp8,0,4.485610644022624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,fp8,0,4.876938819885254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,float16,0,1.1871786912282307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,float16,0,4.847061475118001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,fp8,0,1.172922690709432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,64,128,1,fp8,fp8,0,1.0913066864013672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,float16,0,1.174730698267619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,fp8,0,4.8285706837972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,64,0,1,fp8,fp8,0,4.494085311889648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,fp8,0,1.18722136815389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,64,128,1,fp8,fp8,0,1.0929173628489177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,float16,0,4.841658592224121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,64,0,1,fp8,fp8,0,4.498890558878581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,fp8,0,4.840847969055176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,float16,0,4.857199986775716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,fp8,0,4.843621253967285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,64,0,1,fp8,fp8,0,4.4966080983479815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,float16,0,4.200469334920247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,fp8,0,4.228416124979655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,64,128,1,fp8,fp8,0,3.79859193166097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,float16,0,4.215813318888347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,fp8,0,4.25435733795166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,64,128,1,fp8,fp8,0,3.842042605082194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,float16,0,4.2439680099487305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,fp8,0,4.280682563781738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,float16,0,20.076165517171223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,64,0,1,fp8,fp8,0,18.51193618774414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,fp8,0,20.098021189371746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,64,128,1,fp8,fp8,0,3.8752638498942056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,float16,0,20.142655690511067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,fp8,0,20.129493713378906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,float16,0,2.465439955393473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,64,0,1,fp8,fp8,0,18.525243123372395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,64,128,1,fp8,fp8,0,2.332597255706787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,fp8,0,2.52127997080485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,float16,0,20.16100311279297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,float16,0,2.198042710622152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,fp8,0,2.2182772954305015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,64,128,1,fp8,fp8,0,2.004298686981201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,float16,0,10.55778694152832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,fp8,0,10.595738728841146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,float16,0,2.2092639605204263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,64,0,1,fp8,fp8,0,9.742005030314127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,float16,0,10.164800008138021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,64,0,1,fp8,fp8,0,18.571236928304035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,fp8,0,2.2275519371032715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,fp8,0,20.226378122965496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,64,128,1,fp8,fp8,0,2.0166080792744956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,float16,0,2.220458666483561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,fp8,0,10.197077433268229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,64,0,1,fp8,fp8,0,9.394800186157227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,fp8,0,2.2390987078348794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,float16,0,1.3449759483337402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,64,128,1,fp8,fp8,0,2.037994702657064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,fp8,0,1.3791680335998535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,float16,0,10.18834114074707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,64,128,1,fp8,fp8,0,1.2905759811401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,64,0,1,fp8,fp8,0,9.40781339009603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,fp8,0,10.208794911702475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,float16,0,5.432645161946614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,float16,0,10.24343490600586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,float16,0,1.2204373677571614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,fp8,0,1.2287253538767497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,64,0,1,fp8,fp8,0,9.421274820963541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,64,128,1,fp8,fp8,0,1.1254933675130208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,fp8,0,10.254037221272787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,fp8,0,5.46010144551595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,64,0,1,fp8,fp8,0,5.040560086568196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,float16,0,1.2261706988016765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,fp8,0,1.2336479822794597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,64,128,1,fp8,fp8,0,1.1306453545888264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,float16,0,5.267653465270996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,float16,0,1.2298933664957683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,fp8,0,5.2525332768758135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,fp8,0,1.240447998046875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,64,0,1,fp8,fp8,0,4.863626797993978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,64,128,1,fp8,fp8,0,1.138159990310669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,float16,0,5.2694346110026045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,float16,0,0.8925066788991293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,64,0,1,fp8,fp8,0,4.866565386454265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,fp8,0,5.280981381734212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,fp8,0,0.8867466449737549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,64,128,1,fp8,fp8,0,0.8298506736755371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,float16,0,5.273130734761556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,float16,0,0.8877706527709961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,float16,0,2.9739252726236978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,fp8,0,0.884389321009318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,64,128,1,fp8,fp8,0,0.824186642964681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,fp8,0,5.28766409556071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,64,0,1,fp8,fp8,0,4.872661272684733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,64,0,1,fp8,fp8,0,2.742506663004557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,fp8,0,2.968912124633789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,float16,0,2.950746536254883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,float16,0,0.8882293701171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,fp8,0,0.8873546918233236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,64,128,1,fp8,fp8,0,0.825984001159668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,fp8,0,2.9497706095377603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,float16,0,0.8872586886088053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,64,0,1,fp8,fp8,0,2.7315041224161782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,fp8,0,0.8917973041534424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,float16,0,2.9518454869588218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,64,128,1,fp8,fp8,0,0.8239093621571859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,fp8,0,2.9498561223347983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,64,0,1,fp8,fp8,0,2.7433493932088218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,float16,0,2.955567995707194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,64,0,1,fp8,fp8,0,2.7313973108927407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,fp8,0,2.9628372192382812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,float16,0,3.4912001291910806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,64,128,1,fp8,fp8,0,3.15827210744222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,fp8,0,3.5156160990397134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,float16,0,3.4998559951782227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,fp8,0,3.5331945419311523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,64,128,1,fp8,fp8,0,3.1845601399739585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,float16,0,3.5222558975219727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,float16,0,14.41650644938151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,64,0,1,fp8,fp8,0,13.291722615559896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,fp8,0,14.456991831461588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,fp8,0,3.554101308186849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,float16,0,14.481280008951822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,float16,0,2.051637331644694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,64,128,1,fp8,fp8,0,3.20908260345459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,fp8,0,14.502330780029297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,64,0,1,fp8,fp8,0,13.309280395507812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,fp8,0,2.100341320037842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,64,128,1,fp8,fp8,0,1.9461867014567058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,float16,0,14.505743662516275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,float16,0,1.8334986368815105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,fp8,0,1.846933364868164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,float16,0,7.65284792582194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,64,128,1,fp8,fp8,0,1.6725172996520996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,fp8,0,7.686384201049805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,64,0,1,fp8,fp8,0,7.051370620727539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,float16,0,7.329856236775716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,64,0,1,fp8,fp8,0,13.35491689046224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,float16,0,1.8401707013448079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,fp8,0,14.530773162841797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,fp8,0,1.8573439915974934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,64,128,1,fp8,fp8,0,1.6849279403686523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,float16,0,1.8490667343139648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,fp8,0,7.362122853597005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,64,0,1,fp8,fp8,0,6.775541305541992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,fp8,0,1.8644533157348633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,float16,0,7.350485483805339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,64,128,1,fp8,fp8,0,1.6943039894104004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,float16,0,1.121519962946574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,64,0,1,fp8,fp8,0,6.787909189860026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,fp8,0,1.148645321528117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,fp8,0,7.379685084025065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,64,128,1,fp8,fp8,0,1.0776320298512776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,float16,0,1.018997351328532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,float16,0,7.384991963704427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,float16,0,3.9476426442464194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,fp8,0,1.024880011876424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,64,128,1,fp8,fp8,0,0.9389119942982992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,64,0,1,fp8,fp8,0,6.798709233601888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,64,0,1,fp8,fp8,0,3.6700000762939453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,fp8,0,3.9780960083007812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,fp8,0,7.411626815795898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,float16,0,3.8156105677286782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,float16,0,1.0221386750539143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,fp8,0,1.0300959746042888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,64,128,1,fp8,fp8,0,0.9454186757405599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,float16,0,1.0265706380208333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,fp8,0,3.8143412272135415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,64,0,1,fp8,fp8,0,3.5282185872395835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,fp8,0,1.0340853532155354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,64,128,1,fp8,fp8,0,0.9517013231913248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,float16,0,3.816101392110189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,float16,0,0.7371679941813151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,64,0,1,fp8,fp8,0,3.538640022277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,fp8,0,3.834239959716797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,fp8,0,0.7359946568806967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,64,128,1,fp8,fp8,0,0.6892426808675131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,float16,0,3.8253065745035806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,float16,0,2.1949493090311685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,float16,0,0.7463413079579672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,fp8,0,3.8348639806111655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,64,0,1,fp8,fp8,0,3.541285196940104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,fp8,0,0.740453322728475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,fp8,0,2.1981226603190103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,64,0,1,fp8,fp8,0,2.026650587717692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,64,128,1,fp8,fp8,0,0.6890239715576172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,float16,0,2.173914591471354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,float16,0,0.7385973135630289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,fp8,0,0.7398186524709066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,64,128,1,fp8,fp8,0,0.6898667017618815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,fp8,0,2.173914591471354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,64,0,1,fp8,fp8,0,2.0181387265523276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,float16,0,0.7381866772969564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,float16,0,2.1745227177937827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,fp8,0,0.7387306690216064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,64,128,1,fp8,fp8,0,0.6864319642384847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,fp8,0,2.1826400756835938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,64,0,1,fp8,fp8,0,2.0202186902364097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,float16,0,2.1854613622029624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,fp8,0,2.177461306254069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,64,0,1,fp8,fp8,0,2.021695931752523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,float16,0,5.474831899007161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,64,128,1,fp8,fp8,0,4.971071879069011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,fp8,0,5.517007827758789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,float16,0,5.548650741577148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,64,128,1,fp8,fp8,0,5.058298746744792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,fp8,0,5.58958371480306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,float16,0,5.5906931559244795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,64,0,1,fp8,fp8,0,17.760021209716797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,float16,0,19.273338317871094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,fp8,0,19.28658676147461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,float16,0,19.381088256835938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,fp8,0,5.630890528361003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,64,128,1,fp8,fp8,0,5.096927960713704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,float16,0,3.147029240926107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,64,0,1,fp8,fp8,0,17.80683771769206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,fp8,0,19.400122324625652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,fp8,0,3.215008099873861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,64,128,1,fp8,fp8,0,2.9738613764444985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,float16,0,19.441083272298176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,float16,0,2.7852961222330728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,float16,0,10.181418736775717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,64,128,1,fp8,fp8,0,2.525733311971029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,fp8,0,2.8078667322794595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,64,0,1,fp8,fp8,0,9.399199803670248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,fp8,0,10.23475710550944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,float16,0,2.795818646748861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,64,0,1,fp8,fp8,0,17.858367919921875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,float16,0,9.706581115722656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,fp8,0,19.50424575805664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,fp8,0,2.8191359837849936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,64,128,1,fp8,fp8,0,2.5435360272725425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,float16,0,2.8128906885782876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,64,0,1,fp8,fp8,0,8.9541384379069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,fp8,0,9.729530970255533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,float16,0,9.736607869466146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,fp8,0,2.839066823323568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,64,128,1,fp8,fp8,0,2.5637173652648926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,float16,0,1.6417867342631023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,64,0,1,fp8,fp8,0,8.967039744059244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,fp8,0,9.754181543986002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,64,128,1,fp8,fp8,0,1.5605279604593914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,fp8,0,1.6805599530537922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,float16,0,9.770405451456705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,float16,0,1.4687946637471516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,float16,0,5.193711916605632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,fp8,0,1.479765256245931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,64,0,1,fp8,fp8,0,4.8018239339192705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,fp8,0,5.2283627192179365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,64,128,1,fp8,fp8,0,1.3437439600626628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,fp8,0,9.802373250325521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,64,0,1,fp8,fp8,0,8.974608103434244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,float16,0,1.4731574058532715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,float16,0,4.95364252726237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,fp8,0,1.4876532554626465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,64,128,1,fp8,fp8,0,1.3497066497802734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,float16,0,1.4810293515523274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,fp8,0,4.978138605753581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,64,0,1,fp8,fp8,0,4.575855890909831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,fp8,0,1.495039939880371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,float16,0,4.978639920552571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,64,128,1,fp8,fp8,0,1.3589706420898438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,float16,0,0.90066130956014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,fp8,0,4.986234664916992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,64,0,1,fp8,fp8,0,4.585050582885742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,fp8,0,0.9226933320363363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,64,128,1,fp8,fp8,0,0.866213321685791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,float16,0,4.9996747970581055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,float16,0,0.818021297454834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,float16,0,2.704304059346517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,fp8,0,0.8237226804097494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,64,0,1,fp8,fp8,0,4.595802625020345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,fp8,0,5.014906565348308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,64,0,1,fp8,fp8,0,2.5136426289876304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,64,128,1,fp8,fp8,0,0.7552533149719238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,fp8,0,2.7241973876953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,float16,0,2.595109303792318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,float16,0,0.8223040103912354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,fp8,0,0.8284053007761637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,64,128,1,fp8,fp8,0,0.7590239842732748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,64,0,1,fp8,fp8,0,2.4046452840169272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,fp8,0,2.600074609120687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,float16,0,0.8245440324147543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,fp8,0,0.8306879997253418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,float16,0,2.6049013137817383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,64,128,1,fp8,fp8,0,0.7650187015533447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,fp8,0,2.60643736521403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,64,0,1,fp8,fp8,0,2.4054346084594727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,float16,0,0.5921813249588013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,fp8,0,0.5943946838378906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,float16,0,2.608954588572184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,64,128,1,fp8,fp8,0,0.5525973240534464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,float16,0,1.5296212832132976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,64,0,1,fp8,fp8,0,2.4080959955851235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,fp8,0,2.617824077606201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,float16,0,0.5946933428446451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,fp8,0,1.5269653002421062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,fp8,0,0.6022453308105469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,64,0,1,fp8,fp8,0,1.4106879234313965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,64,128,1,fp8,fp8,0,0.5552906592686971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,float16,0,1.5119253794352214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,float16,0,0.5954986810684204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,fp8,0,0.602399984995524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,64,128,1,fp8,fp8,0,0.5534293254216512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,fp8,0,1.51365327835083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,64,0,1,fp8,fp8,0,1.4075039227803547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,float16,0,1.51910400390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,float16,0,0.5948799848556519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,fp8,0,0.5990933179855347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,64,128,1,fp8,fp8,0,0.5549973249435425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,fp8,0,1.520960013071696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,64,0,1,fp8,fp8,0,1.407258669535319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,float16,0,1.5195627212524414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,fp8,0,1.5215466817220051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,64,0,1,fp8,fp8,0,1.4123573303222656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,float16,0,4.074117342631022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,64,128,1,fp8,fp8,0,3.6826934814453125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,fp8,0,4.10316785176595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,float16,0,4.094448089599609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,64,128,1,fp8,fp8,0,3.7295945485432944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,fp8,0,4.130256017049153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,64,0,1,fp8,fp8,0,10.69484837849935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,float16,0,11.63478978474935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,fp8,0,11.667770385742188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,float16,0,4.120314598083496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,float16,0,11.69717280069987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,64,128,1,fp8,fp8,0,3.754096031188965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,fp8,0,4.151023864746094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,float16,0,2.3566506703694663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,fp8,0,11.718864440917969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,64,0,1,fp8,fp8,0,10.757476806640625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,64,128,1,fp8,fp8,0,2.2212586402893066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,fp8,0,2.4042347272237143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,float16,0,11.756858825683594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,float16,0,2.0852905909220376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,float16,0,6.222026824951172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,fp8,0,6.277498881022136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,64,0,1,fp8,fp8,0,10.799728393554688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,fp8,0,2.1031947135925293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,fp8,0,11.770511627197266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,64,128,1,fp8,fp8,0,1.8967413902282715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,64,0,1,fp8,fp8,0,5.767434438069661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,float16,0,2.095167954762777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,float16,0,5.902965545654297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,64,128,1,fp8,fp8,0,1.9067947069803874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,fp8,0,2.113861401875814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,fp8,0,5.9076690673828125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,float16,0,2.107909361521403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,64,0,1,fp8,fp8,0,5.4381974538167315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,fp8,0,2.1268213589986167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,float16,0,5.911658604939778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,fp8,0,5.939781188964844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,float16,0,1.2334187030792236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,64,128,1,fp8,fp8,0,1.9212212562561035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,64,0,1,fp8,fp8,0,5.439290364583333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,fp8,0,1.261786699295044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,64,128,1,fp8,fp8,0,1.1738293170928955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,float16,0,5.935338973999023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,float16,0,3.18719482421875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,float16,0,1.1049439907073975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,fp8,0,1.1126826604207356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,fp8,0,3.2202879587809243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,64,0,1,fp8,fp8,0,5.4655412038167315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,64,128,1,fp8,fp8,0,1.009925365447998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,fp8,0,5.971093495686849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,64,0,1,fp8,fp8,0,2.9633121490478516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,float16,0,3.031968116760254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,float16,0,1.1088693141937256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,fp8,0,1.1176799933115642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,64,128,1,fp8,fp8,0,1.0161706606547039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,fp8,0,3.0412588119506836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,64,0,1,fp8,fp8,0,2.7952747344970703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,float16,0,1.1140000025431316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,fp8,0,1.125264008839925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,float16,0,3.0473012924194336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,64,128,1,fp8,fp8,0,1.0226613680521648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,fp8,0,3.0511039098103843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,64,0,1,fp8,fp8,0,2.805253346761068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,float16,0,0.6804052988688151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,fp8,0,0.6964106559753418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,float16,0,3.0520960489908853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,64,128,1,fp8,fp8,0,0.652949333190918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,float16,0,1.6832480430603027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,64,0,1,fp8,fp8,0,2.8106133143107095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,float16,0,0.6188426812489828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,fp8,0,3.060394605000814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,fp8,0,1.6974132855733235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,64,0,1,fp8,fp8,0,1.5732587178548176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,fp8,0,0.6233493487040201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,64,128,1,fp8,fp8,0,0.5714453458786011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,float16,0,1.60152006149292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,float16,0,0.6189546585083008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,fp8,0,0.6257760127385458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,64,128,1,fp8,fp8,0,0.5740799903869629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,fp8,0,1.605493386586507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,64,0,1,fp8,fp8,0,1.4859253565470378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,float16,0,0.6234079996744791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,float16,0,1.6088746388753254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,fp8,0,0.6278560161590576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,64,128,1,fp8,fp8,0,0.5773280064264933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,fp8,0,1.614261309305827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,64,0,1,fp8,fp8,0,1.4880960782368977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,float16,0,0.44924267133076984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,float16,0,1.6179307301839192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,fp8,0,0.4503946701685588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,64,128,1,fp8,fp8,0,0.42178666591644287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,fp8,0,1.6203734079996746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,float16,0,0.9801546732584635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,64,0,1,fp8,fp8,0,1.4901439348856609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,float16,0,0.4509386618932088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,fp8,0,0.9713760217030843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,64,0,1,fp8,fp8,0,0.9027199745178223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,fp8,0,0.4511626561482747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,64,128,1,fp8,fp8,0,0.4185173511505127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,float16,0,0.9666079680124918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,float16,0,0.45138665040334064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,fp8,0,0.9726186593373617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,64,0,1,fp8,fp8,0,0.8973173300425211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,fp8,0,0.4546826680501302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,64,128,1,fp8,fp8,0,0.42027199268341064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,float16,0,0.9668266773223877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,float16,0,0.4509706497192383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,fp8,0,0.9731733004252116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,float16,0,0.9774933656056722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,64,0,1,fp8,fp8,0,0.8993972937266032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,fp8,0,0.4508853356043498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,64,128,1,fp8,fp8,0,0.42535467942555744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,fp8,0,0.9731733004252116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,64,0,1,fp8,fp8,0,0.8983360131581625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,float16,0,5.380677541097005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,64,128,1,fp8,fp8,0,4.877306620279948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,fp8,0,5.4129282633463545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,float16,0,5.454154968261719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,64,128,1,fp8,fp8,0,4.964970588684082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,fp8,0,5.490730921427409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,64,0,1,fp8,fp8,0,10.786570231119791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,float16,0,11.737968444824219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,fp8,0,11.756746927897135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,float16,0,11.817525227864584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,float16,0,5.486464182535808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,fp8,0,5.52348264058431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,64,128,1,fp8,fp8,0,4.998053232828776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,fp8,0,11.882320404052734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,float16,0,3.0646133422851562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,fp8,0,3.1168692906697593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,64,0,1,fp8,fp8,0,10.867781321207682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,64,128,1,fp8,fp8,0,2.893301328023275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,float16,0,11.886698404947916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,float16,0,6.322373072306315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,fp8,0,11.911493937174479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,64,0,1,fp8,fp8,0,10.904922485351562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,float16,0,2.6945279439290366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,fp8,0,2.7175680796305337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,64,128,1,fp8,fp8,0,2.442527929941813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,fp8,0,6.380197525024414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,64,0,1,fp8,fp8,0,5.862831751505534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,float16,0,5.901391983032227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,float16,0,2.7103732426961265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,fp8,0,2.7320852279663086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,64,128,1,fp8,fp8,0,2.4622987111409507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,64,0,1,fp8,fp8,0,5.408271789550781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,fp8,0,5.913018544514974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,float16,0,2.7232958475748696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,float16,0,5.915669123331706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,fp8,0,2.751749356587728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,64,128,1,fp8,fp8,0,2.4774187405904136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,fp8,0,5.933280309041341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,64,0,1,fp8,fp8,0,5.438714981079102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,float16,0,1.5673866271972656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,fp8,0,1.6021599769592285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,64,128,1,fp8,fp8,0,1.4833760261535645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,float16,0,5.945269266764323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,float16,0,3.212474822998047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,64,0,1,fp8,fp8,0,5.447845458984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,fp8,0,5.962618509928386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,float16,0,1.3921173413594563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,fp8,0,3.248554547627767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,64,0,1,fp8,fp8,0,2.9906508127848306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,fp8,0,1.4038079579671223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,64,128,1,fp8,fp8,0,1.2686560153961182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,float16,0,3.000885327657064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,float16,0,1.3979360262552898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,64,128,1,fp8,fp8,0,1.276853322982788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,fp8,0,1.408906618754069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,64,0,1,fp8,fp8,0,2.7640053431193032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,fp8,0,3.0146986643473306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,float16,0,1.4044480323791504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,float16,0,3.0213546752929688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,fp8,0,1.4187359809875488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,fp8,0,3.0328054428100586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,64,128,1,fp8,fp8,0,1.2838666439056396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,64,0,1,fp8,fp8,0,2.7713759740193686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,float16,0,0.8265759944915771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,float16,0,3.029343922932943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,fp8,0,0.8461600144704183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,64,128,1,fp8,fp8,0,0.7878080209096273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,float16,0,1.6607252756754558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,fp8,0,3.041269302368164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,float16,0,0.7401119867960612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,64,0,1,fp8,fp8,0,2.7829386393229165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,fp8,0,1.6793120702107747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,fp8,0,0.7460906505584717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,64,0,1,fp8,fp8,0,1.5551679929097493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,64,128,1,fp8,fp8,0,0.6785973707834879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,float16,0,1.5625972747802734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,float16,0,0.7433226903279623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,fp8,0,0.7502346833546957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,64,128,1,fp8,fp8,0,0.6846186319986979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,64,0,1,fp8,fp8,0,1.441882610321045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,fp8,0,1.5669973691304524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,float16,0,1.565322717030843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,float16,0,0.7477280298868815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,fp8,0,1.573088010152181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,64,0,1,fp8,fp8,0,1.448512077331543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,fp8,0,0.7541546821594238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,64,128,1,fp8,fp8,0,0.6867413520812988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,float16,0,0.45720001061757404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,float16,0,1.5719520250956218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,fp8,0,0.46859200795491535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,float16,0,0.8894773324330648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,64,128,1,fp8,fp8,0,0.4411199887593587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,fp8,0,1.5805493990580242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,64,0,1,fp8,fp8,0,1.4471680323282878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,fp8,0,0.9030826886494955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,64,0,1,fp8,fp8,0,0.8361759980519613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,float16,0,0.8406240145365397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,float16,0,0.4137760003407796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,fp8,0,0.41682132085164386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,float16,0,0.41646401087443036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,64,128,1,fp8,fp8,0,0.38581868012746173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,fp8,0,0.8429706891377767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,64,0,1,fp8,fp8,0,0.7787253061930338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,fp8,0,0.4199039936065674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,64,128,1,fp8,fp8,0,0.3874880075454712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,fp8,0,0.8473227024078369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,float16,0,0.4189066489537557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,float16,0,0.8426667054494222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,fp8,0,0.42241064707438153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,64,0,1,fp8,fp8,0,0.7845280170440674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,float16,0,0.8464852968851725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,64,128,1,fp8,fp8,0,0.3909226655960083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,float16,0,0.30904533465703327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,fp8,0,0.8515360355377197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,64,0,1,fp8,fp8,0,0.7861599922180176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,fp8,0,0.31037332614262897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,float16,0,0.5381493171056112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,64,128,1,fp8,fp8,0,0.2871626615524292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,fp8,0,0.5372960170110067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,float16,0,0.3049760063489278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,64,0,1,fp8,fp8,0,0.4990453322728475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,float16,0,0.5338559945424398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,fp8,0,0.3083626627922058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,64,128,1,fp8,fp8,0,0.28550400336583454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,fp8,0,0.53275199731191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,64,0,1,fp8,fp8,0,0.4946560064951579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,float16,0,0.3039253354072571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,float16,0,0.5349973440170288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,fp8,0,0.30685333410898846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,64,128,1,fp8,fp8,0,0.2874133388201396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,fp8,0,0.5343679984410604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,float16,0,0.5369013150533041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,64,128,1,fp8,fp8,0,0.28851733605066937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,64,0,1,fp8,fp8,0,0.49619734287261963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,float16,0,0.30530667304992676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,64,0,1,fp8,fp8,0,0.49806400140126544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,fp8,0,0.30855466922124225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,fp8,0,0.5383253494898478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,float16,0,3.987482706705729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,64,128,1,fp8,fp8,0,3.6016213099161782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,fp8,0,4.015775998433431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,float16,0,4.017354647318522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,64,0,1,fp8,fp8,0,6.744223912556966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,float16,0,7.371381123860677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,fp8,0,7.400037129720052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,64,128,1,fp8,fp8,0,3.649354616800944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,fp8,0,4.048714637756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,float16,0,7.399354934692383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,float16,0,4.038485209147136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,64,0,1,fp8,fp8,0,6.798816045125325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,fp8,0,7.426122665405273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,64,128,1,fp8,fp8,0,3.6795358657836914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,fp8,0,4.0734561284383135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,float16,0,2.2891626358032227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,fp8,0,2.3328426678975425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,64,128,1,fp8,fp8,0,2.158181349436442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,float16,0,7.4426829020182295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,fp8,0,7.472890853881836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,float16,0,4.028527895609538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,64,0,1,fp8,fp8,0,6.8253173828125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,fp8,0,4.074858665466309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,float16,0,2.01802666982015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,fp8,0,2.0396052996317544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,64,128,1,fp8,fp8,0,1.8320372899373372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,64,0,1,fp8,fp8,0,3.7476746241251626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,float16,0,3.7140105565389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,float16,0,2.0331360499064126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,fp8,0,2.051520029703776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,64,0,1,fp8,fp8,0,3.4131253560384116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,64,128,1,fp8,fp8,0,1.844037373860677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,fp8,0,3.7375574111938477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,float16,0,3.7307252883911133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,float16,0,2.043717384338379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,fp8,0,3.748501459757487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,64,128,1,fp8,fp8,0,1.8574986457824707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,fp8,0,2.061728000640869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,64,0,1,fp8,fp8,0,3.4241065979003906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,float16,0,1.1780959765116374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,float16,0,3.756079991658529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,fp8,0,1.203541358311971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,64,128,1,fp8,fp8,0,1.1166293621063232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,fp8,0,3.7731199264526367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,float16,0,2.054170608520508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,64,0,1,fp8,fp8,0,3.439023971557617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,float16,0,1.046015977859497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,fp8,0,2.079616069793701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,64,0,1,fp8,fp8,0,1.9242933591206868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,fp8,0,1.0560800234476726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,64,128,1,fp8,fp8,0,0.9523200194040934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,float16,0,1.9053279558817546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,float16,0,1.0515893300374348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,fp8,0,1.9140373865763347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,64,128,1,fp8,fp8,0,0.9604746500651041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,64,0,1,fp8,fp8,0,1.7538612683614094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,fp8,0,1.0613866647084553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,float16,0,1.9125919342041016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,fp8,0,1.924741268157959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,float16,0,1.0577812989552815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,64,0,1,fp8,fp8,0,1.7621280352274578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,fp8,0,1.0680853525797527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,64,128,1,fp8,fp8,0,0.9675467014312744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,float16,0,0.6229066848754883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,float16,0,1.919706662495931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,fp8,0,0.6372640132904053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,64,128,1,fp8,fp8,0,0.5936266581217448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,float16,0,1.0737653573354085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,64,0,1,fp8,fp8,0,1.768074671427409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,fp8,0,1.9341227213541667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,fp8,0,1.0852533181508381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,float16,0,0.5587199926376343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,64,0,1,fp8,fp8,0,1.004853328069051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,fp8,0,0.563701351483663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,64,128,1,fp8,fp8,0,0.5127306779225668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,float16,0,0.9973333676656088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,float16,0,0.5615626573562622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,fp8,0,1.0050026575724285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,64,0,1,fp8,fp8,0,0.9218186537424723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,fp8,0,0.566271980603536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,64,128,1,fp8,fp8,0,0.515775998433431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,float16,0,1.0006666978200276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,float16,0,0.564303994178772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,fp8,0,1.006650686264038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,64,0,1,fp8,fp8,0,0.9268480141957601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,fp8,0,0.570464015007019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,64,128,1,fp8,fp8,0,0.5184693336486816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,float16,0,1.0057226816813152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,float16,0,0.3487679958343506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,fp8,0,0.3569920063018799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,float16,0,0.5819840033849081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,64,128,1,fp8,fp8,0,0.33475200335184735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,64,0,1,fp8,fp8,0,0.5493280092875162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,64,0,1,fp8,fp8,0,0.9311947027842203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,fp8,0,1.0106613636016846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,fp8,0,0.5909386475880941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,float16,0,0.3118240038553874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,fp8,0,0.3139520088831584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,float16,0,0.5425546566645304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,fp8,0,0.5439093510309855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,64,128,1,fp8,fp8,0,0.2927146752675374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,64,0,1,fp8,fp8,0,0.5062880118687948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,float16,0,0.31387199958165485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,fp8,0,0.31725333134333294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,fp8,0,0.5476373434066772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,float16,0,0.5457760095596313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,64,128,1,fp8,fp8,0,0.29578133424123126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,64,0,1,fp8,fp8,0,0.5093013445536295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,float16,0,0.3163733283678691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,float16,0,0.5475253264109293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,fp8,0,0.32020266850789386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,float16,0,0.2343626618385315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,64,128,1,fp8,fp8,0,0.2977280020713806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,float16,0,0.3611626625061035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,fp8,0,0.5498079856236776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,64,0,1,fp8,fp8,0,0.5124320189158121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,fp8,0,0.234442671140035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,64,0,1,fp8,fp8,0,0.3381706476211548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,64,128,1,fp8,fp8,0,0.2192373275756836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,fp8,0,0.3654986619949341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,fp8,0,0.3583306471506755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,float16,0,0.22959999243418375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,float16,0,0.3561226526896159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,fp8,0,0.23042132457097372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,fp8,0,0.23207465807596842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,64,128,1,fp8,fp8,0,0.21754666169484457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,64,0,1,fp8,fp8,0,0.3363413413365682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,float16,0,0.23187732696533203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,float16,0,0.36051734288533527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,float16,0,0.35979731877644855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,64,128,1,fp8,fp8,0,0.21594667434692383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,64,128,1,fp8,fp8,0,0.21727999051411948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,fp8,0,0.3570079803466797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,64,0,1,fp8,fp8,0,0.3350133498509725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,float16,0,0.23195733626683554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,fp8,0,0.23202667633692423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,fp8,0,0.36106133460998535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,64,0,1,fp8,fp8,0,0.33639466762542725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,float16,0,5.320650736490886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,64,128,1,fp8,fp8,0,4.796319961547852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,fp8,0,5.339071909586589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,float16,0,5.424928029378255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,float16,0,7.930469512939453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,64,0,1,fp8,fp8,0,7.260522842407227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,fp8,0,7.965557098388672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,float16,0,8.078485488891602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,64,128,1,fp8,fp8,0,4.853253364562988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,fp8,0,5.412437438964844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,float16,0,5.423258463541667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,fp8,0,8.036810557047525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,64,0,1,fp8,fp8,0,7.3113759358723955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,fp8,0,5.435461044311523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,64,128,1,fp8,fp8,0,4.887690544128418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,float16,0,8.057984034220377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,float16,0,3.009786605834961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,fp8,0,3.057648022969564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,64,128,1,fp8,fp8,0,2.8347466786702475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,64,0,1,fp8,fp8,0,7.364826838175456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,float16,0,4.369669278462728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,fp8,0,8.092016220092773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,fp8,0,4.422533353169759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,float16,0,2.642303943634033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,64,0,1,fp8,fp8,0,4.091279983520508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,fp8,0,2.6635146141052246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,64,128,1,fp8,fp8,0,2.391669273376465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,float16,0,3.9610560735066733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,float16,0,2.6590879758199057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,fp8,0,3.983674685160319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,64,0,1,fp8,fp8,0,3.6358985900878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,fp8,0,2.68230406443278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,64,128,1,fp8,fp8,0,2.4137439727783203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,float16,0,3.9810078938802085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,float16,0,2.6758718490600586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,fp8,0,4.004426638285319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,64,0,1,fp8,fp8,0,3.6517546971639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,fp8,0,2.7000907262166343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,64,128,1,fp8,fp8,0,2.431013266245524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,float16,0,4.010592142740886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,float16,0,1.5281279881795247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,fp8,0,4.02625052134196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,fp8,0,1.557674725850423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,64,128,1,fp8,fp8,0,1.442464033762614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,float16,0,2.21671470006307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,64,0,1,fp8,fp8,0,3.668389320373535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,fp8,0,2.2476906776428223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,float16,0,1.3466240564982097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,fp8,0,1.3596906661987305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,64,128,1,fp8,fp8,0,1.2226826349894206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,64,0,1,fp8,fp8,0,2.071733315785726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,float16,0,2.013978640238444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,fp8,0,2.027653376261393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,float16,0,1.356298605600993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,64,0,1,fp8,fp8,0,1.8479040463765461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,fp8,0,1.3694720268249512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,64,128,1,fp8,fp8,0,1.2315626939137776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,float16,0,2.022864023844401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,float16,0,1.3634133338928223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,fp8,0,2.037482738494873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,64,0,1,fp8,fp8,0,1.857034683227539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,fp8,0,1.3766560554504395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,64,128,1,fp8,fp8,0,1.241477330525716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,float16,0,2.036341349283854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,float16,0,0.7902080217997233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,fp8,0,0.8071466286977133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,fp8,0,2.050602595011393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,64,0,1,fp8,fp8,0,1.867322603861491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,float16,0,1.1387413342793782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,64,128,1,fp8,fp8,0,0.7479626337687174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,float16,0,0.701754649480184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,fp8,0,1.1602453390757244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,fp8,0,0.706496000289917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,64,0,1,fp8,fp8,0,1.0715680122375488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,64,128,1,fp8,fp8,0,0.6400800148646036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,float16,0,1.0403520266215007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,float16,0,0.7043093045552572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,fp8,0,1.0477546850840251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,64,0,1,fp8,fp8,0,0.9591093063354492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,fp8,0,0.7123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,float16,0,1.0463573137919109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,64,128,1,fp8,fp8,0,0.6441119909286499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,float16,0,0.7089227040608724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,fp8,0,1.0524799823760986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,64,0,1,fp8,fp8,0,0.9634826978047689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,fp8,0,0.7154826323191324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,float16,0,1.0507786273956299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,64,128,1,fp8,fp8,0,0.6493813196818033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,float16,0,0.4205546776453654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,fp8,0,1.058181365331014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,float16,0,0.6003359953562418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,64,0,1,fp8,fp8,0,0.969866673151652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,64,0,1,fp8,fp8,0,0.5695039828618368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,fp8,0,0.42996267477671307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,64,128,1,fp8,fp8,0,0.4018133481343587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,fp8,0,0.6127253373463949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,float16,0,0.37387200196584064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,float16,0,0.5500799814860026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,64,0,1,fp8,fp8,0,0.513375997543335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,fp8,0,0.37726934750874835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,float16,0,0.5538880030314127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,64,128,1,fp8,fp8,0,0.3476053476333618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,fp8,0,0.5544053316116333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,float16,0,0.37723199526468915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,fp8,0,0.3813279867172241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,64,128,1,fp8,fp8,0,0.34891200065612793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,float16,0,0.5560213327407837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,fp8,0,0.5565706491470337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,float16,0,0.3789973258972168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,fp8,0,0.5608959992726644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,64,0,1,fp8,fp8,0,0.5144746700922648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,fp8,0,0.3823733329772949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,64,128,1,fp8,fp8,0,0.35259199142456055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,float16,0,0.23970667521158853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,64,0,1,fp8,fp8,0,0.5179359912872314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,64,0,1,fp8,fp8,0,0.3179946740468343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,float16,0,0.33480532964070636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,fp8,0,0.24408533175786337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,64,128,1,fp8,fp8,0,0.22979199886322021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,fp8,0,0.3399999936421712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,float16,0,0.2113866607348124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,float16,0,0.3048853278160095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,fp8,0,0.21392534176508585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,64,128,1,fp8,fp8,0,0.20120000839233398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,fp8,0,0.3062346577644348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,64,0,1,fp8,fp8,0,0.28752533594767254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,float16,0,0.21145067612330118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,64,0,1,fp8,fp8,0,0.2903253237406413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,float16,0,0.30533866087595624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,fp8,0,0.21412799755732217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,64,128,1,fp8,fp8,0,0.20154666900634766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,fp8,0,0.3090026577313741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,float16,0,0.21353065967559814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,fp8,0,0.3086666663487752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,float16,0,0.30694399277369183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,fp8,0,0.2157920002937317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,64,128,1,fp8,fp8,0,0.2033066749572754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,64,0,1,fp8,fp8,0,0.2914239962895711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,float16,0,0.16588266690572104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,float16,0,0.21588265895843506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,fp8,0,0.164000004529953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,64,128,1,fp8,fp8,0,0.154341330130895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,float16,0,0.21377599239349365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,fp8,0,0.21557867527008057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,64,0,1,fp8,fp8,0,0.2009119987487793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,float16,0,0.16223999857902527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,fp8,0,0.1611146628856659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,64,128,1,fp8,fp8,0,0.15028267105420431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,fp8,0,0.214026669661204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,64,128,1,fp8,fp8,0,0.1502400040626526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,64,0,1,fp8,fp8,0,0.19924799601236978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,float16,0,0.16034666697184244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,float16,0,0.21149333318074545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,fp8,0,0.16051733493804932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,fp8,0,0.2124639948209127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,64,0,1,fp8,fp8,0,0.2002346714337667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,float16,0,0.1625226636727651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,float16,0,0.21314134200414023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,fp8,0,0.1604320009549459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,64,128,1,fp8,fp8,0,0.15034666657447815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,fp8,0,0.2118026614189148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,64,0,1,fp8,fp8,0,0.2007946570714315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,float16,0,3.9400854110717773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,64,128,1,fp8,fp8,0,3.5536746978759766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,fp8,0,3.967434565226237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,float16,0,3.974554697672526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,float16,0,5.215456008911133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,64,0,1,fp8,fp8,0,4.772474606831868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,fp8,0,5.244480133056641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,float16,0,5.260117212931315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,64,128,1,fp8,fp8,0,3.595669428507487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,fp8,0,4.002511978149414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,float16,0,3.994880040486654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,fp8,0,5.291093190511067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,fp8,0,4.026874542236328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,64,0,1,fp8,fp8,0,4.806453386942546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,64,128,1,fp8,fp8,0,3.620901425679525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,float16,0,5.294661204020183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,float16,0,2.25437863667806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,64,128,1,fp8,fp8,0,2.119983990987142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,fp8,0,2.2934239705403647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,float16,0,2.930591901143392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,64,0,1,fp8,fp8,0,4.839130719502767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,fp8,0,5.326842625935872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,fp8,0,2.9668267567952475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,64,0,1,fp8,fp8,0,2.742741266886393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,float16,0,1.9751092592875164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,64,128,1,fp8,fp8,0,1.788042704264323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,fp8,0,1.9932799339294434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,float16,0,2.619589328765869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,float16,0,1.9904853502909343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,64,0,1,fp8,fp8,0,2.4056639671325684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,fp8,0,2.641594727834066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,fp8,0,2.006783962249756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,float16,0,2.6429707209269204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,64,128,1,fp8,fp8,0,1.8026933670043945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,fp8,0,2.6538826624552407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,float16,0,2.0026772816975913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,64,0,1,fp8,fp8,0,2.4139787356058755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,fp8,0,2.023530642191569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,64,128,1,fp8,fp8,0,1.81714661916097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,float16,0,2.6571359634399414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,float16,0,1.1469279925028484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,fp8,0,2.6753759384155273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,64,0,1,fp8,fp8,0,2.4304426511128745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,fp8,0,1.1708426475524902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,64,128,1,fp8,fp8,0,1.0831039746602376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,float16,0,1.4878826141357422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,fp8,0,1.5109440485636394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,64,0,1,fp8,fp8,0,1.3978187243143718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,float16,0,1.0112533569335938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,64,128,1,fp8,fp8,0,0.9178613026936849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,fp8,0,1.0205120245615642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,float16,0,1.3406666119893391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,fp8,0,1.3483519554138184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,64,0,1,fp8,fp8,0,1.2272106806437175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,float16,0,1.017093340555827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,fp8,0,1.0270346800486247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,float16,0,1.3481547037760417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,64,128,1,fp8,fp8,0,0.924186627070109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,float16,0,1.0238773028055828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,fp8,0,1.356730620066325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,64,0,1,fp8,fp8,0,1.2370239893595378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,fp8,0,1.0320213635762532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,64,128,1,fp8,fp8,0,0.932101329167684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,float16,0,1.3540852864583333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,float16,0,0.5945760011672974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,fp8,0,1.3652853965759277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,64,0,1,fp8,fp8,0,1.2422880331675212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,float16,0,0.7716480096181234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,fp8,0,0.6087466478347778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,64,128,1,fp8,fp8,0,0.5641173521677653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,fp8,0,0.7833066781361898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,64,0,1,fp8,fp8,0,0.7250186602274576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,float16,0,0.5274879932403564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,float16,0,0.6935359636942545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,fp8,0,0.5321226517359415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,64,128,1,fp8,fp8,0,0.48308265209198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,fp8,0,0.698965311050415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,64,0,1,fp8,fp8,0,0.6418773333231608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,float16,0,0.5311466852823893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,float16,0,0.6995946566263834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,fp8,0,0.5350293318430582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,64,128,1,fp8,fp8,0,0.485642671585083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,fp8,0,0.7024640242258707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,64,0,1,fp8,fp8,0,0.645466685295105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,float16,0,0.5330186684926351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,float16,0,0.7033440272013346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,fp8,0,0.5384746789932251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,float16,0,0.41073067982991535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,64,128,1,fp8,fp8,0,0.4899199803670247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,float16,0,0.3195093274116516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,fp8,0,0.7083573341369629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,fp8,0,0.32614399989446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,64,0,1,fp8,fp8,0,0.6493173440297445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,64,128,1,fp8,fp8,0,0.30585600932439166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,fp8,0,0.41843732198079425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,64,0,1,fp8,fp8,0,0.3893866539001465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,float16,0,0.28148265679677326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,float16,0,0.36982933680216473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,fp8,0,0.2834773262341817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,64,128,1,fp8,fp8,0,0.2633226712544759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,fp8,0,0.3715146780014038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,64,0,1,fp8,fp8,0,0.34727466106414795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,float16,0,0.28330133358637494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,float16,0,0.37282665570576984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,fp8,0,0.2873386740684509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,64,128,1,fp8,fp8,0,0.26705066363016766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,fp8,0,0.3744693199793498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,64,128,1,fp8,fp8,0,0.2666826645533244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,64,0,1,fp8,fp8,0,0.3486826817194621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,fp8,0,0.37787731488545734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,float16,0,0.2860373258590698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,float16,0,0.37433067957560223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,fp8,0,0.28985599676767987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,float16,0,0.18188265959421793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,64,0,1,fp8,fp8,0,0.35035733381907147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,float16,0,0.23024000724156699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,fp8,0,0.18509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,64,128,1,fp8,fp8,0,0.17527467012405396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,fp8,0,0.23621867100397745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,64,0,1,fp8,fp8,0,0.22170666853586832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,float16,0,0.16010666886965433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,float16,0,0.20725866158803305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,fp8,0,0.16051200032234192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,64,128,1,fp8,fp8,0,0.14825600385665894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,fp8,0,0.2076639930407206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,64,0,1,fp8,fp8,0,0.193722665309906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,float16,0,0.15845333536465964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,float16,0,0.206986665725708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,fp8,0,0.1623360017935435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,64,128,1,fp8,fp8,0,0.14827199776967367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,fp8,0,0.20945600668589273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,64,0,1,fp8,fp8,0,0.19509865840276083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,float16,0,0.1597653329372406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,float16,0,0.20789867639541626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,fp8,0,0.1628266672293345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,64,128,1,fp8,fp8,0,0.15236799915631613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,fp8,0,0.21024533112843832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,fp8,0,0.15570666392644247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,64,0,1,fp8,fp8,0,0.19740800062815347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,float16,0,0.12332266569137573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,float16,0,0.15498133500417074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,fp8,0,0.12135466933250427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,64,128,1,fp8,fp8,0,0.11751466989517212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,64,0,1,fp8,fp8,0,0.1459946632385254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,64,0,1,fp8,fp8,0,0.14316800236701965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,float16,0,0.11988266309102376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,float16,0,0.15339733163515726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,float16,0,0.15296533703804016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,fp8,0,0.12164266904195149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,fp8,0,0.12169067064921062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,64,128,1,fp8,fp8,0,0.11618666847546895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,fp8,0,0.15330132842063904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,float16,0,0.12150399883588155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,64,128,1,fp8,fp8,0,0.11317867040634155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,fp8,0,0.15376533071200052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,64,0,1,fp8,fp8,0,0.1423520048459371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,64,0,1,fp8,fp8,0,0.14147200187047324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,float16,0,0.12139733632405598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,float16,0,0.15268799662590027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,fp8,0,0.12121599912643433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,64,128,1,fp8,fp8,0,0.11520000298817952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,fp8,0,0.15253866712252298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,float16,0,4.779103914896647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,64,128,1,fp8,fp8,0,4.446111996968587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,fp8,0,4.772213300069173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,float16,0,5.606954574584961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,float16,0,4.849018732706706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,64,0,1,fp8,fp8,0,5.249504089355469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,fp8,0,5.599221547444661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,float16,0,5.667621612548828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,fp8,0,4.800378799438477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,64,128,1,fp8,fp8,0,4.62062931060791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,float16,0,4.891557375590007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,fp8,0,5.621770858764648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,64,0,1,fp8,fp8,0,5.406485239664714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,fp8,0,4.8604691823323565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,float16,0,5.698266983032227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,float16,0,2.6569973627726235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,64,128,1,fp8,fp8,0,4.614570617675781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,fp8,0,2.609109401702881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,float16,0,3.0711841583251953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,64,128,1,fp8,fp8,0,2.5641226768493652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,fp8,0,5.703072230021159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,fp8,0,3.063551902770996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,64,0,1,fp8,fp8,0,5.4143416086832685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,float16,0,2.368154684702555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,fp8,0,2.362570603688558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,64,0,1,fp8,fp8,0,2.9642613728841147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,64,128,1,fp8,fp8,0,2.210831960042318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,float16,0,2.775573412577311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,fp8,0,2.7801173528035483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,64,0,1,fp8,fp8,0,2.6110080083211265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,float16,0,2.3791893323262534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,fp8,0,2.3715945879618325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,float16,0,2.790272076924642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,64,128,1,fp8,fp8,0,2.2584479649861655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,fp8,0,2.7866185506184897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,64,0,1,fp8,fp8,0,2.6575466791788735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,float16,0,2.382762591044108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,fp8,0,2.3815627098083496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,float16,0,2.792970657348633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,float16,0,1.3024693330128987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,64,128,1,fp8,fp8,0,2.3106133143107095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,fp8,0,1.2772160371144612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,float16,0,1.525477409362793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,fp8,0,2.7914292017618814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,64,0,1,fp8,fp8,0,2.718746821085612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,64,128,1,fp8,fp8,0,1.2569759686787922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,fp8,0,1.4940160115559895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,float16,0,1.1915840307871501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,64,0,1,fp8,fp8,0,1.46342929204305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,float16,0,1.4035627047220867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,fp8,0,1.192080020904541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,64,128,1,fp8,fp8,0,1.0875626405080159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,fp8,0,1.3988213539123535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,64,0,1,fp8,fp8,0,1.29147736231486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,float16,0,1.1953333218892415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,64,128,1,fp8,fp8,0,1.1128640174865723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,float16,0,1.4034239451090496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,fp8,0,1.1972320079803467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,64,0,1,fp8,fp8,0,1.3094027042388916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,fp8,0,1.4035520553588867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,float16,0,1.1991519927978516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,fp8,0,1.1971680323282878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,float16,0,1.4104266166687012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,64,128,1,fp8,fp8,0,1.1159626642862956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,float16,0,0.6617706616719564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,float16,0,0.7736533482869467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,fp8,0,0.647216002146403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,fp8,0,1.4048159917195637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,64,0,1,fp8,fp8,0,1.3196907043457031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,64,128,1,fp8,fp8,0,0.6331520080566406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,float16,0,0.6069493293762207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,fp8,0,0.7603147029876709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,64,0,1,fp8,fp8,0,0.7402133146921793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,float16,0,0.710863987604777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,fp8,0,0.6079893509546915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,fp8,0,0.7130826314290365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,64,0,1,fp8,fp8,0,0.6567360162734985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,64,128,1,fp8,fp8,0,0.554208000500997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,float16,0,0.6093279918034872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,float16,0,0.7153013547261556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,fp8,0,0.6087626616160074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,64,128,1,fp8,fp8,0,0.5654773314793905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,fp8,0,0.7157386938730875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,64,0,1,fp8,fp8,0,0.6666293144226074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,float16,0,0.6096640030543009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,float16,0,0.7172053654988607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,fp8,0,0.6107573509216309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,64,128,1,fp8,fp8,0,0.5681013266245524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,fp8,0,0.7170453071594238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,float16,0,0.34117865562438965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,64,0,1,fp8,fp8,0,0.6686986287434896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,float16,0,0.40011199315388996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,fp8,0,0.3335786660512288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,64,128,1,fp8,fp8,0,0.32868266105651855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,fp8,0,0.313098669052124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,fp8,0,0.3936053514480591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,64,0,1,fp8,fp8,0,0.38200533390045166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,float16,0,0.31282132863998413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,float16,0,0.36859198411305744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,64,128,1,fp8,fp8,0,0.288266658782959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,fp8,0,0.36768531799316406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,64,0,1,fp8,fp8,0,0.3405386606852214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,float16,0,0.3141226569811503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,float16,0,0.36882134278615314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,float16,0,0.31486932436625165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,fp8,0,0.3134933312733968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,64,128,1,fp8,fp8,0,0.29310933748881024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,fp8,0,0.3678986628850301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,64,0,1,fp8,fp8,0,0.3469066619873047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,float16,0,0.36929599444071454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,fp8,0,0.314794659614563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,64,128,1,fp8,fp8,0,0.29341866572697956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,fp8,0,0.36904001235961914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,64,0,1,fp8,fp8,0,0.34834667046864826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,float16,0,0.18211734294891357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,float16,0,0.21286400159200033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,fp8,0,0.17895466089248657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,64,128,1,fp8,fp8,0,0.1766186753908793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,fp8,0,0.20938666661580405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,64,128,1,fp8,fp8,0,0.15467199683189392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,64,0,1,fp8,fp8,0,0.2053013245264689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,float16,0,0.1648373305797577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,float16,0,0.1941653291384379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,fp8,0,0.16458666324615479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,fp8,0,0.19434666633605957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,64,0,1,fp8,fp8,0,0.18174399932225546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,float16,0,0.1660319964090983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,float16,0,0.19504533211390176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,fp8,0,0.16497066617012024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,64,128,1,fp8,fp8,0,0.15847466389338175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,fp8,0,0.1940106749534607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,64,0,1,fp8,fp8,0,0.18579200903574625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,float16,0,0.1649066706498464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,float16,0,0.194757342338562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,fp8,0,0.16633066534996033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,64,128,1,fp8,fp8,0,0.1588746706644694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,fp8,0,0.19406400124231973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,64,0,1,fp8,fp8,0,0.18541866540908813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,float16,0,0.10100266337394714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,float16,0,0.09076799949010213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,float16,0,0.11794132987658183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,fp8,0,0.09133866429328918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,fp8,0,0.09974400202433269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,64,128,1,fp8,fp8,0,0.09949866930643718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,fp8,0,0.11538666486740112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,64,0,1,fp8,fp8,0,0.11589333415031433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,float16,0,0.10532266894976298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,64,128,1,fp8,fp8,0,0.08288533488909404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,fp8,0,0.10734933614730835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,fp8,0,0.10665599505106609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,64,0,1,fp8,fp8,0,0.09940266609191895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,float16,0,0.09128533800443013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,float16,0,0.10663466652234395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,fp8,0,0.09117866555849712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,64,128,1,fp8,fp8,0,0.08474133412043254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,64,0,1,fp8,fp8,0,0.10085333387056987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,64,0,1,fp8,fp8,0,0.10105066498120625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,float16,0,0.0906773308912913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,float16,0,0.10699199636777242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,fp8,0,0.09230400125185649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,64,128,1,fp8,fp8,0,0.05324266850948334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,64,128,1,fp8,fp8,0,0.08519466718037923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,fp8,0,0.10672000050544739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,float16,0,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,float16,0,0.06605333089828491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,fp8,0,0.05463466544946035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,64,0,1,fp8,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,float16,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,float16,0,0.06523733337720235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,fp8,0,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,64,128,1,fp8,fp8,0,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,fp8,0,0.06419200201829274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,64,0,1,fp8,fp8,0,0.0603413333495458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,float16,0,0.05395199855168661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,float16,0,0.05310933291912079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,float16,0,0.06423999865849812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,fp8,0,0.054101333022117615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,64,128,1,fp8,fp8,0,0.05203199883302053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,fp8,0,0.06428800026575725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,64,0,1,fp8,fp8,0,0.060831998785336815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,fp8,0,0.06411199768384297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,64,0,1,fp8,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,float16,0,0.06412266691525777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,fp8,0,0.053823997577031456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,64,128,1,fp8,fp8,0,0.05138133466243744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,float16,0,4.622458775838216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,64,128,1,fp8,fp8,0,4.316074689229329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,float16,0,4.622533480326335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,fp8,0,4.595103899637858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,64,0,1,fp8,fp8,0,4.395119984944661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,fp8,0,4.626933415730794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,float16,0,4.694208145141602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,float16,0,4.739658673604329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,64,128,1,fp8,fp8,0,4.445898691813151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,fp8,0,4.684581438700358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,64,0,1,fp8,fp8,0,4.496176083882649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,fp8,0,4.709013303120931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,float16,0,4.779936154683431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,float16,0,4.781349182128906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,fp8,0,4.713621457417806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,64,128,1,fp8,fp8,0,4.457418759663899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,float16,0,2.599599997202555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,float16,0,2.6444640159606934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,fp8,0,2.549328009287516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,96,64,128,1,fp8,fp8,0,2.479647954305013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,fp8,0,2.56493870417277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,64,0,1,fp8,fp8,0,4.513082822163899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,fp8,0,4.7634932200113935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,float16,0,2.2902026176452637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,96,64,0,1,fp8,fp8,0,2.5236426989237466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,float16,0,2.310650666554769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,fp8,0,2.2902612686157227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,64,128,1,fp8,fp8,0,2.1391785939534507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,64,0,1,fp8,fp8,0,2.173242727915446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,fp8,0,2.3050079345703125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,float16,0,2.294645309448242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,float16,0,2.3142080307006836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,fp8,0,2.2953227361043296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,64,128,1,fp8,fp8,0,2.2106827100118003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,fp8,0,2.3111359278361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,64,0,1,fp8,fp8,0,2.240880012512207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,float16,0,2.303365389506022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,float16,0,1.2683306535085042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,float16,0,2.322650591532389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,64,128,1,fp8,fp8,0,2.2296907107035318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,fp8,0,2.296725273132324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,float16,0,1.2808799743652344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,64,0,1,fp8,fp8,0,2.2591039339701333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,fp8,0,2.3206987380981445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,fp8,0,1.2377173105875652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,64,128,1,fp8,fp8,0,1.2185440063476562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,fp8,0,1.2555466492970784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,64,0,1,fp8,fp8,0,1.24616535504659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,float16,0,1.1538080374399822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,float16,0,1.1612426439921062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,64,128,1,fp8,fp8,0,1.0584266980489094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,fp8,0,1.153760035832723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,fp8,0,1.1643199920654297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,64,0,1,fp8,fp8,0,1.0729386806488037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,float16,0,1.1575146516164143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,float16,0,1.1651306947072346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,64,128,1,fp8,fp8,0,1.0740319887797039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,fp8,0,1.1554453372955322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,fp8,0,1.1647306283315022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,64,0,1,fp8,fp8,0,1.0928853352864583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,float16,0,1.159285306930542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,float16,0,1.1680320103963215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,fp8,0,1.1588640213012695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,64,128,1,fp8,fp8,0,1.0779146353403728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,float16,0,0.6415040095647176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,fp8,0,1.1674346923828125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,float16,0,0.6497973203659058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,64,0,1,fp8,fp8,0,1.1003306706746419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,fp8,0,0.6267520189285278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,64,128,1,fp8,fp8,0,0.618778665860494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,fp8,0,0.6373706658681234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,64,0,1,fp8,fp8,0,0.6274720033009847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,float16,0,0.5875306526819865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,float16,0,0.5901653369267782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,64,0,1,fp8,fp8,0,0.5449493328730265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,fp8,0,0.5862506628036499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,64,128,1,fp8,fp8,0,0.5378133455912272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,fp8,0,0.5916586716969808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,float16,0,0.5868853330612183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,float16,0,0.5924213329950968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,fp8,0,0.5915786822636923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,64,0,1,fp8,fp8,0,0.554149349530538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,fp8,0,0.5894240140914917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,64,128,1,fp8,fp8,0,0.5462453365325928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,float16,0,0.5914080142974854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,float16,0,0.594650665918986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,fp8,0,0.5890880028406779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,64,128,1,fp8,fp8,0,0.5486506621042887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,fp8,0,0.5944960117340088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,64,0,1,fp8,fp8,0,0.5565706491470337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,float16,0,0.3303839961687724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,float16,0,0.3347359895706177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,fp8,0,0.3234399954477946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,64,128,1,fp8,fp8,0,0.32021333773930866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,fp8,0,0.30188266436258954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,fp8,0,0.3285653392473857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,64,0,1,fp8,fp8,0,0.3261173367500305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,float16,0,0.30267200867335003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,float16,0,0.3040800094604492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,64,128,1,fp8,fp8,0,0.2784053285916646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,fp8,0,0.30433066685994464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,64,128,1,fp8,fp8,0,0.2836853265762329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,64,0,1,fp8,fp8,0,0.2831040024757385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,float16,0,0.3023680051167806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,float16,0,0.30529600381851196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,fp8,0,0.30220266183217365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,fp8,0,0.30641067028045654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,64,0,1,fp8,fp8,0,0.28646934032440186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,float16,0,0.30364267031351727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,float16,0,0.3089493314425151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,fp8,0,0.3046453396479289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,64,128,1,fp8,fp8,0,0.2850026686986287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,64,128,1,fp8,fp8,0,0.17394665877024332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,fp8,0,0.3076853354771932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,fp8,0,0.1769226590792338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,64,0,1,fp8,fp8,0,0.2876159946123759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,float16,0,0.17707200845082602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,float16,0,0.17906665802001953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,fp8,0,0.17361599206924438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,64,0,1,fp8,fp8,0,0.17628266414006552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,float16,0,0.15966932972272238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,float16,0,0.16220800081888834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,fp8,0,0.15939199924468994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,64,128,1,fp8,fp8,0,0.14919466773668924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,fp8,0,0.16194666425387064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,64,0,1,fp8,fp8,0,0.1520746648311615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,float16,0,0.15994133551915488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,float16,0,0.16063466668128967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,fp8,0,0.16019733746846518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,64,128,1,fp8,fp8,0,0.15122666954994202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,fp8,0,0.16159466902414957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,64,0,1,fp8,fp8,0,0.15345600247383118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,float16,0,0.1604320009549459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,float16,0,0.16267733772595724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,fp8,0,0.16007999579111734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,64,128,1,fp8,fp8,0,0.15262933572133383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,fp8,0,0.09578133622805278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,fp8,0,0.16320000092188516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,64,0,1,fp8,fp8,0,0.15451733271280924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,float16,0,0.09682133793830872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,float16,0,0.09876799583435059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,64,128,1,fp8,fp8,0,0.09670399626096089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,fp8,0,0.0967733363310496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,64,128,1,fp8,fp8,0,0.08090133468310039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,64,0,1,fp8,fp8,0,0.09877866506576538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,float16,0,0.08859733740488689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,float16,0,0.08726400136947632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,float16,0,0.08900800347328186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,fp8,0,0.0886240005493164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,fp8,0,0.08660266796747844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,fp8,0,0.08944533268610637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,64,0,1,fp8,fp8,0,0.08225599924723308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,float16,0,0.09070932865142822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,fp8,0,0.08889066179593404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,64,128,1,fp8,fp8,0,0.08138133088747661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,fp8,0,0.08966933687527974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,64,0,1,fp8,fp8,0,0.08447466293970744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,float16,0,0.08859200278917949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,float16,0,0.08993066350618999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,64,128,1,fp8,fp8,0,0.0823520024617513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,64,128,1,fp8,fp8,0,0.05312533179918925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,fp8,0,0.09073066711425781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,64,0,1,fp8,fp8,0,0.0844053328037262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,float16,0,0.05514133473237356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,float16,0,0.055205335219701133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,fp8,0,0.05367999772230784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,fp8,0,0.05514666438102722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,64,0,1,fp8,fp8,0,0.0537013312180837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,float16,0,0.052671998739242554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,float16,0,0.052186667919158936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,fp8,0,0.05229333539803823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,64,128,1,fp8,fp8,0,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,fp8,0,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,64,0,1,fp8,fp8,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,float16,0,0.052101333936055504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,float16,0,0.05171733101209005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,float16,0,0.05203199883302053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,fp8,0,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,64,128,1,fp8,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,fp8,0,0.053317333261171974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,64,0,1,fp8,fp8,0,0.049770668148994446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,float16,0,0.05358933409055074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,fp8,0,0.05236800014972687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,64,128,1,fp8,fp8,0,0.05089599887530009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,fp8,0,0.05241066714127859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,64,0,1,fp8,fp8,0,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,float16,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,float16,0,0.036837334434191384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,64,128,1,fp8,fp8,0,0.03426666557788849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,64,128,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,64,0,1,fp8,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,float16,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,float16,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,64,0,1,fp8,fp8,0,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,float16,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,float16,0,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,64,128,1,fp8,fp8,0,0.03452266752719879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,64,0,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,float16,0,0.03617066641648611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,64,128,1,fp8,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,fp8,0,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,64,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,float16,0,2.123722712198893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,float16,0,2.08132266998291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,fp8,0,2.119647979736328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,64,128,1,fp8,fp8,0,1.9741759300231934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,fp8,0,2.0768213272094727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,64,0,1,fp8,fp8,0,1.9176373481750488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,float16,0,2.11954132715861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,float16,0,2.0791680018107095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,fp8,0,2.120474656422933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,64,128,1,fp8,fp8,0,2.040602684020996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,fp8,0,2.071674664815267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,64,0,1,fp8,fp8,0,1.99835205078125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,float16,0,2.136261304219564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,float16,0,2.0963093439737954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,fp8,0,2.127247969309489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,float16,0,1.1780693531036377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,float16,0,1.155237356821696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,64,128,1,fp8,fp8,0,2.06549866994222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,fp8,0,2.0829386711120605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,fp8,0,1.14901336034139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,64,0,1,fp8,fp8,0,2.017338593800863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,64,128,1,fp8,fp8,0,1.1545546849568684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,fp8,0,1.1283893585205078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,64,0,1,fp8,fp8,0,1.1308533350626628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,float16,0,1.0713173548380535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,float16,0,1.0470720132191975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,fp8,0,1.0671306451161702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,64,128,1,fp8,fp8,0,0.9801599979400635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,fp8,0,1.0451359748840332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,64,0,1,fp8,fp8,0,0.9530666669209799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,float16,0,1.0681119759877522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,float16,0,1.0467840035756428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,fp8,0,1.0690399805704753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,64,128,1,fp8,fp8,0,0.9945546785990397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,fp8,0,1.0446293354034424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,64,0,1,fp8,fp8,0,0.9727946917215983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,float16,0,1.0720000267028809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,float16,0,1.04749329884847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,fp8,0,1.06986665725708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,64,128,1,fp8,fp8,0,0.9986720085144043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,float16,0,0.5970880190531412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,fp8,0,0.582751989364624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,float16,0,0.5841386715571085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,64,128,1,fp8,fp8,0,0.5813226699829102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,fp8,0,1.0469120343526204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,64,0,1,fp8,fp8,0,0.972437302271525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,fp8,0,0.5722346703211466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,64,0,1,fp8,fp8,0,0.5695146719614664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,float16,0,0.5445653200149536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,float16,0,0.5300586620966593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,fp8,0,0.5417759815851847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,64,128,1,fp8,fp8,0,0.49589868386586505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,fp8,0,0.53056534131368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,64,0,1,fp8,fp8,0,0.4819680054982503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,float16,0,0.5434666474660238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,float16,0,0.5315413475036621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,fp8,0,0.5422720114390055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,64,128,1,fp8,fp8,0,0.5035733381907145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,fp8,0,0.5306080182393392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,64,0,1,fp8,fp8,0,0.4907519817352295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,float16,0,0.5445226828257242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,float16,0,0.5318400065104166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,fp8,0,0.5430506865183512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,64,128,1,fp8,fp8,0,0.5060693422953287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,fp8,0,0.5312106609344482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,64,128,1,fp8,fp8,0,0.30058133602142334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,float16,0,0.3053013285001119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,fp8,0,0.2935466567675273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,64,0,1,fp8,fp8,0,0.49434133370717365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,float16,0,0.2993706663449605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,fp8,0,0.29809067646662396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,fp8,0,0.2781013250350952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,64,0,1,fp8,fp8,0,0.29391467571258545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,float16,0,0.27747199932734173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,float16,0,0.27084799607594806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,64,128,1,fp8,fp8,0,0.25677333275477093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,fp8,0,0.27297067642211914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,64,0,1,fp8,fp8,0,0.250271995862325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,float16,0,0.2780639926592509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,float16,0,0.2727839946746826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,float16,0,0.2794933319091797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,float16,0,0.2727893392244975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,fp8,0,0.2775946656862895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,64,128,1,fp8,fp8,0,0.2606400052706401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,fp8,0,0.2712533275286357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,64,0,1,fp8,fp8,0,0.25430933634440106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,fp8,0,0.279423991839091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,fp8,0,0.16157333056131998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,64,128,1,fp8,fp8,0,0.2613226572672526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,fp8,0,0.27322133382161456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,fp8,0,0.1602026621500651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,64,0,1,fp8,fp8,0,0.25474133094151813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,float16,0,0.164410670598348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,float16,0,0.16246400276819864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,64,128,1,fp8,fp8,0,0.16265066464742026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,64,0,1,fp8,fp8,0,0.15869333346684775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,64,0,1,fp8,fp8,0,0.13446933031082153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,float16,0,0.14805333813031515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,float16,0,0.14405333002408346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,fp8,0,0.14942933122316995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,64,128,1,fp8,fp8,0,0.13900267084439596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,fp8,0,0.14385066429773966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,float16,0,0.14781866470972696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,float16,0,0.14589333534240723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,fp8,0,0.1476586659749349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,64,128,1,fp8,fp8,0,0.14071999986966452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,fp8,0,0.145578662554423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,64,0,1,fp8,fp8,0,0.1359946628411611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,float16,0,0.149536003669103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,float16,0,0.14652799566586813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,float16,0,0.09073600172996521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,fp8,0,0.14988266428311667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,64,128,1,fp8,fp8,0,0.1414400041103363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,fp8,0,0.1458293298880259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,64,0,1,fp8,fp8,0,0.13757333159446716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,float16,0,0.08866666754086812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,fp8,0,0.09057066837946574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,64,128,1,fp8,fp8,0,0.09129599730173747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,fp8,0,0.08861333131790161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,64,128,1,fp8,fp8,0,0.07622399926185608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,64,0,1,fp8,fp8,0,0.0904319981733958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,float16,0,0.08245866497357686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,float16,0,0.0821973333756129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,fp8,0,0.08256533245245616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,fp8,0,0.08161066472530365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,64,0,1,fp8,fp8,0,0.07443733513355255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,fp8,0,0.08049599826335907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,float16,0,0.08241599798202515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,float16,0,0.08042666812737782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,fp8,0,0.08308800061543782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,64,128,1,fp8,fp8,0,0.07835733393828075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,64,0,1,fp8,fp8,0,0.07617599765459697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,float16,0,0.0841919978459676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,float16,0,0.08100266754627228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,fp8,0,0.08329066634178162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,float16,0,0.049402669072151184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,64,128,1,fp8,fp8,0,0.07825066645940144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,64,128,1,fp8,fp8,0,0.049813335140546165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,fp8,0,0.08249066770076752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,64,0,1,fp8,fp8,0,0.07528000076611836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,float16,0,0.04996266464392344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,fp8,0,0.05019199848175049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,fp8,0,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,fp8,0,0.049253334601720176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,64,0,1,fp8,fp8,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,float16,0,0.048911998669306435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,float16,0,0.04783466458320618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,64,128,1,fp8,fp8,0,0.04586666822433472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,64,128,1,fp8,fp8,0,0.047242666284243263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,64,0,1,fp8,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,64,0,1,fp8,fp8,0,0.04573333263397217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,float16,0,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,float16,0,0.04794133206208547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,64,128,1,fp8,fp8,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,fp8,0,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,float16,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,float16,0,0.04779199759165446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,fp8,0,0.03428266694148382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,float16,0,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,fp8,0,0.04960533479849497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,64,0,1,fp8,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,64,0,1,fp8,fp8,0,0.04560000201066335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,float16,0,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,64,128,1,fp8,fp8,0,0.032602667808532715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,float16,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,float16,0,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,fp8,0,0.034976000587145485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,64,128,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,fp8,0,0.03233066697915395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,64,0,1,fp8,fp8,0,0.031119999786218006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,float16,0,0.033045334120591484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,64,128,1,fp8,fp8,0,0.03284800052642822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,fp8,0,0.03319466610749563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,64,0,1,fp8,fp8,0,0.032229334115982056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,float16,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,fp8,0,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,64,128,1,fp8,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,64,0,1,fp8,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,64,128,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,fp8,0,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,64,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,64,128,1,fp8,fp8,0,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,64,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,64,128,1,fp8,fp8,0,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,64,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,64,128,1,fp8,fp8,0,0.02250666668017705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,fp8,0,0.022656001150608063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,64,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,float16,0,1.1234453519185383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,float16,0,1.125109354654948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,fp8,0,1.1201813220977783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,64,128,1,fp8,fp8,0,1.0521653493245442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,fp8,0,1.122437318166097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,64,0,1,fp8,fp8,0,1.0520906448364258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,float16,0,1.127786636352539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,float16,0,1.1272160212198894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,fp8,0,1.1256053447723389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,64,128,1,fp8,fp8,0,1.0752800305684407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,fp8,0,1.1230933666229248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,64,0,1,fp8,fp8,0,1.078495979309082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,float16,0,1.1317120393117268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,float16,0,1.1337333520253499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,fp8,0,1.1271413167317708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,64,128,1,fp8,fp8,0,1.0860479672749836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,float16,0,0.6267146666844686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,fp8,0,1.1282560030619304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,float16,0,0.6257333358128866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,64,0,1,fp8,fp8,0,1.0934293270111084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,64,128,1,fp8,fp8,0,0.6233013470967611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,fp8,0,0.6136426528294882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,fp8,0,0.6134026845296224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,64,0,1,fp8,fp8,0,0.623855988184611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,fp8,0,0.5679839849472046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,float16,0,0.5685653289159139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,float16,0,0.5688373247782389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,64,128,1,fp8,fp8,0,0.5308800141016642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,fp8,0,0.5670719941457113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,float16,0,0.5687626600265503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,64,0,1,fp8,fp8,0,0.5333866675694784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,float16,0,0.5691999991734823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,fp8,0,0.5683573484420776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,64,128,1,fp8,fp8,0,0.5406773487726847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,fp8,0,0.5670666694641113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,float16,0,0.5713066657384237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,64,0,1,fp8,fp8,0,0.5402933359146118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,float16,0,0.5698506832122803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,fp8,0,0.5690079927444458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,64,128,1,fp8,fp8,0,0.544597347577413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,fp8,0,0.5693759918212891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,float16,0,0.31963199377059937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,64,0,1,fp8,fp8,0,0.544106682141622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,float16,0,0.320522665977478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,fp8,0,0.3145066698392232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,64,128,1,fp8,fp8,0,0.3185439904530843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,fp8,0,0.31516800324122113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,64,0,1,fp8,fp8,0,0.3200533390045166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,float16,0,0.2919519941012065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,float16,0,0.29314666986465454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,fp8,0,0.2919999957084656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,64,128,1,fp8,fp8,0,0.27339200178782147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,fp8,0,0.2914453347524007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,64,128,1,fp8,fp8,0,0.279039998849233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,64,0,1,fp8,fp8,0,0.2730933427810669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,float16,0,0.2916053334871928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,float16,0,0.29280000925064087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,fp8,0,0.2913706700007121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,fp8,0,0.29129600524902344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,64,0,1,fp8,fp8,0,0.2776053349177043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,float16,0,0.2922933300336202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,float16,0,0.2943999965985616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,fp8,0,0.2922559976577759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,64,128,1,fp8,fp8,0,0.27989866336186725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,fp8,0,0.16602133711179098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,fp8,0,0.2924480040868123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,64,0,1,fp8,fp8,0,0.2796853383382161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,float16,0,0.17067732413609824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,float16,0,0.1694986621538798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,64,128,1,fp8,fp8,0,0.17044800519943237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,fp8,0,0.16742400328318277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,64,128,1,fp8,fp8,0,0.14576533436775208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,64,0,1,fp8,fp8,0,0.16990399360656738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,float16,0,0.15235199530919394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,float16,0,0.15433067083358765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,float16,0,0.1521813372770945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,fp8,0,0.15281599760055542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,fp8,0,0.1525813341140747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,64,0,1,fp8,fp8,0,0.14448533455530801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,float16,0,0.15438933173815408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,fp8,0,0.15396799643834433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,64,128,1,fp8,fp8,0,0.14597866932551065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,fp8,0,0.1543839971224467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,64,0,1,fp8,fp8,0,0.14667733510335287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,float16,0,0.15410666664441428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,float16,0,0.1530133287111918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,fp8,0,0.15382933616638184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,64,128,1,fp8,fp8,0,0.147189329067866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,64,128,1,fp8,fp8,0,0.0950933297475179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,fp8,0,0.15414399902025858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,64,0,1,fp8,fp8,0,0.14826132853825888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,float16,0,0.09120532870292664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,float16,0,0.08301333089669545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,float16,0,0.09218666950861613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,fp8,0,0.09130133191744487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,fp8,0,0.08373333017031352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,fp8,0,0.09050666292508443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,64,0,1,fp8,fp8,0,0.09437333544095357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,float16,0,0.08467732866605122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,fp8,0,0.08447466293970744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,64,128,1,fp8,fp8,0,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,64,0,1,fp8,fp8,0,0.08016533156236012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,float16,0,0.08435733119646709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,float16,0,0.08348266283671062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,fp8,0,0.08478400111198425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,64,128,1,fp8,fp8,0,0.08026666442553203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,fp8,0,0.08311999837557475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,64,0,1,fp8,fp8,0,0.08065066734949748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,float16,0,0.08507200082143147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,float16,0,0.08342933654785156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,fp8,0,0.08469333251317342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,64,128,1,fp8,fp8,0,0.08027733365694682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,fp8,0,0.08376000324885051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,64,0,1,fp8,fp8,0,0.0786186655362447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,fp8,0,0.0516533354918162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,float16,0,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,float16,0,0.051957334081331887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,fp8,0,0.05057600140571594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,64,128,1,fp8,fp8,0,0.052042668064435325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,fp8,0,0.050885334610939026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,64,0,1,fp8,fp8,0,0.04996266464392344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,float16,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,float16,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,float16,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,fp8,0,0.050160000721613564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,64,128,1,fp8,fp8,0,0.04720533390839895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,64,0,1,fp8,fp8,0,0.04758933186531067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,64,0,1,fp8,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,float16,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,fp8,0,0.04979733129342397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,64,128,1,fp8,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,fp8,0,0.050160000721613564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,float16,0,0.05029866596062978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,float16,0,0.04984533290068308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,fp8,0,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,fp8,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,64,128,1,fp8,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,64,128,1,fp8,fp8,0,0.04905599852403005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,64,0,1,fp8,fp8,0,0.04937066634496053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,float16,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,float16,0,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,64,0,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,64,128,1,fp8,fp8,0,0.03002133220434189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,64,0,1,fp8,fp8,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,float16,0,0.031141333281993866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,float16,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,64,128,1,fp8,fp8,0,0.029813334345817566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,64,0,1,fp8,fp8,0,0.03051200012365977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,float16,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,float16,0,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,64,128,1,fp8,fp8,0,0.029743999242782593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,64,128,1,fp8,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,fp8,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,64,0,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,float16,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,64,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,fp8,0,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,64,128,1,fp8,fp8,0,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,fp8,0,0.024138666689395905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,64,128,1,fp8,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,fp8,0,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,64,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,float16,0,0.023770667612552643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,64,128,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,64,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,64,0,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,float16,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,fp8,0,0.017850667238235474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,64,128,1,fp8,fp8,0,0.017792000124851864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,float16,0,0.8003040154774984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,float16,0,0.8005812962849935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,fp8,0,0.7971733411153158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,64,128,1,fp8,fp8,0,0.7395733197530111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,fp8,0,0.7968479792277018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,64,0,1,fp8,fp8,0,0.7393120129903158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,float16,0,0.8000319798787435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,float16,0,0.7989706993103027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,fp8,0,0.7991253534952799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,64,128,1,fp8,fp8,0,0.7442986965179443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,fp8,0,0.7965919971466064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,64,0,1,fp8,fp8,0,0.7443946997324625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,float16,0,0.8005119959513346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,float16,0,0.8017226854960123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,fp8,0,0.7986986637115479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,64,128,1,fp8,fp8,0,0.7509013017018636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,float16,0,0.4333440065383911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,fp8,0,0.7982719739278158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,float16,0,0.4341119925181071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,64,0,1,fp8,fp8,0,0.750058650970459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,fp8,0,0.4280800024668376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,64,128,1,fp8,fp8,0,0.42185068130493164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,fp8,0,0.4287946621576945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,64,0,1,fp8,fp8,0,0.4219199816385905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,float16,0,0.40621332327524823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,float16,0,0.40640532970428467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,fp8,0,0.4053226709365845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,64,128,1,fp8,fp8,0,0.37664000193277997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,fp8,0,0.4051520029703776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,64,0,1,fp8,fp8,0,0.377893328666687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,64,128,1,fp8,fp8,0,0.3814133405685425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,float16,0,0.40637866655985516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,float16,0,0.4060106674830119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,fp8,0,0.4046666622161865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,fp8,0,0.40594665209452313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,64,0,1,fp8,fp8,0,0.3813333511352539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,float16,0,0.40773332118988037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,float16,0,0.40805331865946454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,fp8,0,0.40600534280141193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,64,128,1,fp8,fp8,0,0.3834773302078247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,fp8,0,0.4058239857355754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,float16,0,0.22644799947738647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,64,0,1,fp8,fp8,0,0.38277331988016766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,float16,0,0.22543466091156006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,fp8,0,0.22445867458979288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,64,128,1,fp8,fp8,0,0.2218666672706604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,fp8,0,0.22351467609405518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,64,128,1,fp8,fp8,0,0.1971786618232727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,64,0,1,fp8,fp8,0,0.2220053275426229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,float16,0,0.20934933423995972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,float16,0,0.20987200736999512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,fp8,0,0.20941867431004843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,fp8,0,0.211407999197642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,fp8,0,0.21142399311065674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,64,0,1,fp8,fp8,0,0.19718400637308756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,float16,0,0.21034133434295654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,float16,0,0.21030932664871216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,float16,0,0.21173334121704102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,64,128,1,fp8,fp8,0,0.1981546680132548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,fp8,0,0.21073599656422934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,64,0,1,fp8,fp8,0,0.19814932346343994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,float16,0,0.21160000562667847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,fp8,0,0.2113973299662272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,64,128,1,fp8,fp8,0,0.20068800449371338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,fp8,0,0.21041599909464517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,fp8,0,0.11724799871444702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,64,0,1,fp8,fp8,0,0.1994826594988505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,float16,0,0.11944533387819926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,float16,0,0.11954133709271748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,fp8,0,0.11109866698582967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,fp8,0,0.11753066380818684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,64,128,1,fp8,fp8,0,0.11866133411725362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,64,0,1,fp8,fp8,0,0.11962667107582092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,float16,0,0.11142399907112122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,float16,0,0.11071999867757161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,float16,0,0.11145599683125813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,64,128,1,fp8,fp8,0,0.10446400443712871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,fp8,0,0.1113973359266917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,64,0,1,fp8,fp8,0,0.10504532853762309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,float16,0,0.1113866666952769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,float16,0,0.1113973359266917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,fp8,0,0.11141332983970642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,64,128,1,fp8,fp8,0,0.1048426628112793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,fp8,0,0.11106133460998535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,64,0,1,fp8,fp8,0,0.10514133175214131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,float16,0,0.11116799712181091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,fp8,0,0.11136533816655476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,64,128,1,fp8,fp8,0,0.10495466987291972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,fp8,0,0.1111840009689331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,64,0,1,fp8,fp8,0,0.1048906644185384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,float16,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,float16,0,0.06385066608587901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,fp8,0,0.06396799782911937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,float16,0,0.06284800171852112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,64,128,1,fp8,fp8,0,0.06397333244482677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,fp8,0,0.0642986645301183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,64,0,1,fp8,fp8,0,0.06261333326498668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,float16,0,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,float16,0,0.06232533355553945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,fp8,0,0.063701331615448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,64,128,1,fp8,fp8,0,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,64,0,1,fp8,fp8,0,0.060218666990598045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,float16,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,64,128,1,fp8,fp8,0,0.06004266440868378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,fp8,0,0.06654400130112965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,fp8,0,0.06330133477846782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,fp8,0,0.06289066871007283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,64,0,1,fp8,fp8,0,0.06070399781068166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,float16,0,0.062074666221936546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,float16,0,0.03940266619126002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,float16,0,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,float16,0,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,64,128,1,fp8,fp8,0,0.060133333007494606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,fp8,0,0.06261333326498668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,64,0,1,fp8,fp8,0,0.06081599990526835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,fp8,0,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,64,128,1,fp8,fp8,0,0.0383093332250913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,fp8,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,64,128,1,fp8,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,fp8,0,0.0408693328499794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,64,0,1,fp8,fp8,0,0.03933866570393244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,float16,0,0.038202665746212006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,float16,0,0.03835200021664301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,64,0,1,fp8,fp8,0,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,float16,0,0.03884266565243403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,float16,0,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,float16,0,0.038959999879201256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,64,128,1,fp8,fp8,0,0.037776000797748566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,fp8,0,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,64,0,1,fp8,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,float16,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,fp8,0,0.03842666745185852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,64,128,1,fp8,fp8,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,fp8,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,64,0,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,float16,0,0.026565333207448322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,float16,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,64,128,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,64,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,float16,0,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,float16,0,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,64,128,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,64,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,64,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,float16,0,0.02611200014750163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,64,128,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,64,0,1,fp8,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,64,128,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,64,0,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,float16,0,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,64,128,1,fp8,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,64,0,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,float16,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,64,128,1,fp8,fp8,0,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,64,0,1,fp8,fp8,0,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,64,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,float16,0,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,float16,0,0.640229344367981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,float16,0,0.6414933204650879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,fp8,0,0.6388053496678671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,64,128,1,fp8,fp8,0,0.5877333482106527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,fp8,0,0.6393226782480875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,64,0,1,fp8,fp8,0,0.5876160065333048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,float16,0,0.6380693515141805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,float16,0,0.6378346681594849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,fp8,0,0.6404159863789877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,64,128,1,fp8,fp8,0,0.5894826650619507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,fp8,0,0.6397706667582194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,64,0,1,fp8,fp8,0,0.5910293261210123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,float16,0,0.6395253340403239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,float16,0,0.639087994893392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,fp8,0,0.6388693253199259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,fp8,0,0.6398080190022787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,64,128,1,fp8,fp8,0,0.5908799966176351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,float16,0,0.3425439993540446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,64,0,1,fp8,fp8,0,0.5922773281733195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,float16,0,0.34136001269022626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,fp8,0,0.3387093146642049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,64,128,1,fp8,fp8,0,0.32515732447306317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,fp8,0,0.3391626675923665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,64,0,1,fp8,fp8,0,0.3261973261833191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,float16,0,0.32607465982437134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,float16,0,0.32767999172210693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,fp8,0,0.32785600423812866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,64,128,1,fp8,fp8,0,0.3031466603279114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,fp8,0,0.3264639973640442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,64,0,1,fp8,fp8,0,0.3026133378346761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,float16,0,0.326581339041392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,float16,0,0.3267413377761841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,fp8,0,0.32630399862925213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,64,128,1,fp8,fp8,0,0.30294932921727497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,fp8,0,0.3271893262863159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,fp8,0,0.32757333914438885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,64,0,1,fp8,fp8,0,0.3039360046386719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,64,0,1,fp8,fp8,0,0.30421332518259686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,float16,0,0.3264159957567851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,float16,0,0.3277333378791809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,64,128,1,fp8,fp8,0,0.30461867650349933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,fp8,0,0.3262773354848226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,float16,0,0.17653866608937582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,float16,0,0.1767573356628418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,fp8,0,0.17634665966033936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,64,128,1,fp8,fp8,0,0.17068799336751303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,fp8,0,0.17594667275746664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,fp8,0,0.16986666123072305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,64,0,1,fp8,fp8,0,0.17274133364359537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,64,0,1,fp8,fp8,0,0.1572160025437673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,float16,0,0.16987200578053793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,float16,0,0.17018133401870728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,fp8,0,0.16970133781433105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,fp8,0,0.1683680017789205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,64,128,1,fp8,fp8,0,0.15615999698638916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,float16,0,0.16936532656351724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,float16,0,0.16851200660069784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,64,128,1,fp8,fp8,0,0.15745066603024802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,fp8,0,0.17015467087427774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,64,0,1,fp8,fp8,0,0.1571999986966451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,float16,0,0.16868799924850464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,float16,0,0.16847999890645346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,float16,0,0.09097066521644592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,fp8,0,0.1688906749089559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,64,128,1,fp8,fp8,0,0.15642666816711426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,fp8,0,0.1688213348388672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,64,0,1,fp8,fp8,0,0.15751999616622925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,float16,0,0.09041600426038106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,float16,0,0.09058666229248047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,fp8,0,0.09025067090988159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,float16,0,0.0928000013033549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,fp8,0,0.09059199690818787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,64,128,1,fp8,fp8,0,0.08760533730189006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,float16,0,0.09097066521644592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,fp8,0,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,64,0,1,fp8,fp8,0,0.08922666311264038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,64,128,1,fp8,fp8,0,0.0848586658636729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,fp8,0,0.09060266613960266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,64,0,1,fp8,fp8,0,0.08452266454696655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,float16,0,0.0904853343963623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,fp8,0,0.09027199943860371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,float16,0,0.09101866682370503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,64,128,1,fp8,fp8,0,0.08593599994977315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,64,128,1,fp8,fp8,0,0.08648000160853068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,fp8,0,0.09084266424179077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,64,0,1,fp8,fp8,0,0.08649067083994548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,float16,0,0.0909440020720164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,fp8,0,0.0906826655069987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,fp8,0,0.09090133508046468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,64,128,1,fp8,fp8,0,0.05215999980767568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,64,0,1,fp8,fp8,0,0.0863146682580312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,float16,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,float16,0,0.054144000013669334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,fp8,0,0.053898667295773826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,fp8,0,0.053802669048309326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,64,0,1,fp8,fp8,0,0.05157333115736643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,64,0,1,fp8,fp8,0,0.04978133241335551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,float16,0,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,float16,0,0.05203199883302053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,fp8,0,0.053360000252723694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,64,128,1,fp8,fp8,0,0.04960533479849497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,float16,0,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,64,0,1,fp8,fp8,0,0.050570666790008545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,float16,0,0.05194666484991709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,fp8,0,0.05342933535575867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,64,128,1,fp8,fp8,0,0.050661335388819374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,fp8,0,0.0525439977645874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,float16,0,0.05212800204753876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,float16,0,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,float16,0,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,fp8,0,0.05201066533724467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,64,128,1,fp8,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,64,128,1,fp8,fp8,0,0.04965866605440775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,fp8,0,0.05269333223501841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,64,0,1,fp8,fp8,0,0.050069332122802734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,float16,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,64,0,1,fp8,fp8,0,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,float16,0,0.0349386657277743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,64,128,1,fp8,fp8,0,0.03225066761175791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,64,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,float16,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,float16,0,0.03442133218050003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,fp8,0,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,64,128,1,fp8,fp8,0,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,fp8,0,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,64,0,1,fp8,fp8,0,0.03182933231194814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,float16,0,0.03431999931732813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,64,128,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,64,128,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,fp8,0,0.0349440003434817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,64,0,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,float16,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,float16,0,0.022656001150608063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,64,128,1,fp8,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,64,0,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,float16,0,0.022826666633288067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,float16,0,0.033861334125200905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,64,128,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,64,0,1,fp8,fp8,0,0.024112001061439514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,64,128,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,64,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,64,128,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,64,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,64,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,64,0,1,fp8,fp8,0,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,64,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,float16,0,0.5578986803690592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,float16,0,0.55731201171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,fp8,0,0.5579626560211182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,64,128,1,fp8,fp8,0,0.5140906572341919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,fp8,0,0.5571946700414022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,64,0,1,fp8,fp8,0,0.5133066574732462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,float16,0,0.5574773152669271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,float16,0,0.557749350865682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,fp8,0,0.5574933290481567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,fp8,0,0.5579839944839478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,64,128,1,fp8,fp8,0,0.5142293373743693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,64,0,1,fp8,fp8,0,0.5121013323465983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,float16,0,0.5577066739400228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,float16,0,0.5582346518834432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,fp8,0,0.5570773283640543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,64,128,1,fp8,fp8,0,0.5157653490702311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,float16,0,0.29127999146779376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,fp8,0,0.5574986537297567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,64,0,1,fp8,fp8,0,0.5161973237991333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,float16,0,0.2913653254508972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,fp8,0,0.2913600007692973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,64,128,1,fp8,fp8,0,0.27699732780456543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,fp8,0,0.29013333717981976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,64,0,1,fp8,fp8,0,0.27722134192784625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,float16,0,0.28355199098587036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,float16,0,0.28516799211502075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,fp8,0,0.28520532449086505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,64,128,1,fp8,fp8,0,0.26242132981618244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,fp8,0,0.2844906648000081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,64,0,1,fp8,fp8,0,0.2627146641413371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,float16,0,0.2835093339284261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,float16,0,0.2847946683565776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,fp8,0,0.28405867020289105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,64,128,1,fp8,fp8,0,0.2627786596616109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,fp8,0,0.2833919922510783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,64,0,1,fp8,fp8,0,0.26264532407124835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,float16,0,0.28386666377385456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,float16,0,0.2852693398793538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,fp8,0,0.28538666168848675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,64,128,1,fp8,fp8,0,0.26313600937525433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,fp8,0,0.28428266445795697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,64,0,1,fp8,fp8,0,0.2621919910113017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,float16,0,0.14989866813023886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,float16,0,0.14973866939544678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,fp8,0,0.14813333749771118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,64,128,1,fp8,fp8,0,0.1423520048459371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,fp8,0,0.14962666233380637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,64,0,1,fp8,fp8,0,0.14177599549293518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,float16,0,0.14828266700108847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,float16,0,0.14808000127474466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,fp8,0,0.14786666631698608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,64,128,1,fp8,fp8,0,0.13807466626167297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,fp8,0,0.1483733355998993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,64,0,1,fp8,fp8,0,0.1395199994246165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,float16,0,0.1479626695315043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,float16,0,0.14819199840227762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,fp8,0,0.14803199966748556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,64,128,1,fp8,fp8,0,0.13795733451843262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,fp8,0,0.1472640037536621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,64,0,1,fp8,fp8,0,0.1378720005353292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,float16,0,0.14819199840227762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,float16,0,0.14803199966748556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,fp8,0,0.14755200346310934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,64,128,1,fp8,fp8,0,0.1400373379389445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,fp8,0,0.14815466602643332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,fp8,0,0.0812960018714269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,64,0,1,fp8,fp8,0,0.13798399766286215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,float16,0,0.08181866506735484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,float16,0,0.08028799792130788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,float16,0,0.08060800035794576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,fp8,0,0.08110933502515157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,64,128,1,fp8,fp8,0,0.07464533547560374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,64,128,1,fp8,fp8,0,0.07708266874154408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,64,0,1,fp8,fp8,0,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,float16,0,0.08057066798210144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,float16,0,0.0805920014778773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,fp8,0,0.0804319977760315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,64,128,1,fp8,fp8,0,0.0757173349459966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,fp8,0,0.0806879997253418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,64,0,1,fp8,fp8,0,0.07680533329645793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,float16,0,0.0804906686147054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,float16,0,0.0807360013326009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,fp8,0,0.08108800152937572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,fp8,0,0.08182933429876964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,fp8,0,0.08071466783682506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,64,0,1,fp8,fp8,0,0.07634133100509644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,float16,0,0.08016000191370647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,fp8,0,0.0791786660750707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,64,128,1,fp8,fp8,0,0.07630399862925212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,64,0,1,fp8,fp8,0,0.07653866708278656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,float16,0,0.04937600096066793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,float16,0,0.047914668917655945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,fp8,0,0.04948799808820089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,64,128,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,fp8,0,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,64,0,1,fp8,fp8,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,float16,0,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,float16,0,0.04770666857560476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,fp8,0,0.04753600060939789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,64,128,1,fp8,fp8,0,0.04558399816354116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,fp8,0,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,64,0,1,fp8,fp8,0,0.045237332582473755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,fp8,0,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,64,0,1,fp8,fp8,0,0.045696000258127846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,float16,0,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,fp8,0,0.04791999856630961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,float16,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,fp8,0,0.04779199759165446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,64,128,1,fp8,fp8,0,0.04572266836961111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,float16,0,0.04753600060939789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,float16,0,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,64,128,1,fp8,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,64,0,1,fp8,fp8,0,0.04587733248869578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,float16,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,float16,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,64,128,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,64,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,float16,0,0.03025600065787633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,64,128,1,fp8,fp8,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,64,0,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,float16,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,float16,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,64,128,1,fp8,fp8,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,64,0,1,fp8,fp8,0,0.030762667457262676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,float16,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,float16,0,0.03046400099992752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,64,128,1,fp8,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,64,0,1,fp8,fp8,0,0.03031466652949651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,64,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,float16,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,float16,0,0.02195200075705846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,64,128,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,64,128,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,64,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,64,128,1,fp8,fp8,0,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,64,0,1,fp8,fp8,0,0.02179199953873952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,64,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,float16,0,0.018016000588734944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,64,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,64,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,float16,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,64,0,1,fp8,fp8,0,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,64,128,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,float16,0,3.654266675313314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,64,128,1,fp8,fp8,0,3.398672103881836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,fp8,0,3.6800158818562827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,float16,0,3.6952107747395835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,fp8,0,3.7215518951416016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,64,128,1,fp8,fp8,0,3.4398508071899414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,float16,0,3.713695844014486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,fp8,0,3.7426878611246743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,64,128,1,fp8,fp8,0,3.4653971989949546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,64,0,1,fp8,fp8,0,21.021541595458984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,float16,0,22.640660603841145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,fp8,0,22.723726908365887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,float16,0,22.683583577473957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,64,0,1,fp8,fp8,0,21.040852864583332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,float16,0,3.7437012990315757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,fp8,0,22.752527872721355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,fp8,0,3.7723414103190103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,64,128,1,fp8,fp8,0,3.503925323486328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,float16,0,2.15010134379069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,float16,0,22.726725260416668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,fp8,0,2.1987412770589194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,64,128,1,fp8,fp8,0,2.0785439809163413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,float16,0,11.81826655069987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,64,0,1,fp8,fp8,0,21.08832550048828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,fp8,0,22.775311787923176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,float16,0,1.9032692909240723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,64,0,1,fp8,fp8,0,10.93517812093099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,fp8,0,11.859034220377604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,fp8,0,1.9188373883565266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,float16,0,22.85131072998047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,64,128,1,fp8,fp8,0,1.77130126953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,64,0,1,fp8,fp8,0,21.126197814941406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,float16,0,1.9081333478291829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,fp8,0,1.9228159586588542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,fp8,0,22.81031036376953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,64,128,1,fp8,fp8,0,1.7812906901041667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,float16,0,1.9143360455830891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,float16,0,11.468762715657553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,fp8,0,1.9319839477539062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,64,128,1,fp8,fp8,0,1.7921600341796875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,64,0,1,fp8,fp8,0,10.62115732828776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,fp8,0,11.466117858886719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,float16,0,11.459882100423178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,float16,0,1.9280746777852376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,64,0,1,fp8,fp8,0,10.633909225463867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,fp8,0,11.487861633300781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,fp8,0,1.9457866350809734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,64,128,1,fp8,fp8,0,1.8078506787618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,float16,0,11.496101379394531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,float16,0,1.1732213497161865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,fp8,0,1.204095999399821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,64,128,1,fp8,fp8,0,1.1465546290079753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,fp8,0,11.489828745524088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,64,0,1,fp8,fp8,0,10.660847981770834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,float16,0,6.0795949300130205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,float16,0,1.0578986803690593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,float16,0,11.52947743733724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,fp8,0,1.066362698872884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,64,0,1,fp8,fp8,0,5.625375747680664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,64,128,1,fp8,fp8,0,0.9946346282958984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,fp8,0,6.100197474161784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,64,0,1,fp8,fp8,0,10.666271845499674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,fp8,0,11.539727528889975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,float16,0,1.0611039797465007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,fp8,0,1.0697226524353027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,float16,0,5.9007517496744795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,64,128,1,fp8,fp8,0,0.9982613722483317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,float16,0,1.065834681193034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,fp8,0,5.899146397908528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,64,0,1,fp8,fp8,0,5.470666885375977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,fp8,0,1.073210636774699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,float16,0,5.914554595947266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,64,128,1,fp8,fp8,0,1.0027519861857097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,float16,0,1.0715306599934895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,fp8,0,5.901877085367839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,64,0,1,fp8,fp8,0,5.479808171590169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,fp8,0,1.0806559721628826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,float16,0,5.911455790201823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,64,128,1,fp8,fp8,0,1.0140000184377034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,float16,0,0.7709759871164957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,fp8,0,0.7751946449279785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,64,128,1,fp8,fp8,0,0.7293333212534586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,64,0,1,fp8,fp8,0,5.47984504699707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,fp8,0,5.914229075113933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,float16,0,5.933055877685547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,float16,0,0.7747466564178467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,float16,0,3.2826881408691406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,fp8,0,0.7771680355072021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,64,0,1,fp8,fp8,0,5.495802561442058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,64,128,1,fp8,fp8,0,0.7317706743876139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,fp8,0,5.929146448771159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,64,0,1,fp8,fp8,0,3.0224107106526694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,fp8,0,3.2840372721354165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,float16,0,3.2515894571940103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,float16,0,0.773354689280192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,fp8,0,0.7745280265808105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,64,128,1,fp8,fp8,0,0.7297386328379313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,fp8,0,3.2581119537353516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,float16,0,0.7729492982228597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,fp8,0,0.7730186780293783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,64,0,1,fp8,fp8,0,3.036149342854818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,float16,0,3.25163205464681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,64,128,1,fp8,fp8,0,0.7300000190734863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,fp8,0,3.2603092193603516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,64,0,1,fp8,fp8,0,3.024874687194824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,float16,0,0.7775306701660156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,float16,0,3.2529493967692056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,fp8,0,0.7740106582641602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,64,128,1,fp8,fp8,0,0.7314186890920004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,fp8,0,3.2592480977376304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,64,0,1,fp8,fp8,0,3.021765391031901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,float16,0,3.260784149169922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,64,0,1,fp8,fp8,0,3.034501393636068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,fp8,0,3.2602453231811523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,float16,0,2.7171360651652017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,fp8,0,2.741898536682129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,64,128,1,fp8,fp8,0,2.5195199648539224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,float16,0,2.7309494018554688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,fp8,0,2.7498931884765625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,64,128,1,fp8,fp8,0,2.5468053817749023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,float16,0,2.7409706115722656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,fp8,0,2.764064153035482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,float16,0,13.320453643798828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,64,0,1,fp8,fp8,0,12.341477711995443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,fp8,0,13.32428232828776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,64,128,1,fp8,fp8,0,2.5610987345377603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,float16,0,13.331434885660807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,fp8,0,13.361829121907553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,float16,0,2.7638346354166665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,64,0,1,fp8,fp8,0,12.387504577636719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,fp8,0,2.7893813451131186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,float16,0,13.373034159342447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,64,128,1,fp8,fp8,0,2.5925706227620444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,float16,0,1.6105759938557942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,fp8,0,1.648805300394694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,64,128,1,fp8,fp8,0,1.5563626289367676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,64,0,1,fp8,fp8,0,12.384148915608725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,fp8,0,13.389092763264975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,float16,0,7.028687795003255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,float16,0,1.4291680653889973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,float16,0,13.399829864501953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,fp8,0,1.4420746167500813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,fp8,0,7.062512079874675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,64,0,1,fp8,fp8,0,6.515434900919597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,64,128,1,fp8,fp8,0,1.332576036453247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,64,0,1,fp8,fp8,0,12.434608459472656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,fp8,0,13.419039408365885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,float16,0,1.4329172770182292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,64,128,1,fp8,fp8,0,1.3395573298136394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,fp8,0,1.4430185953776042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,float16,0,6.7758026123046875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,float16,0,1.4397226969401042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,64,0,1,fp8,fp8,0,6.296639760335286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,fp8,0,1.4508213996887207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,fp8,0,6.780437469482422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,64,128,1,fp8,fp8,0,1.3463679949442546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,float16,0,6.7725067138671875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,float16,0,1.4485492706298828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,64,0,1,fp8,fp8,0,6.297210693359375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,fp8,0,6.787120183308919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,float16,0,6.787109375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,fp8,0,1.4629333813985188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,64,128,1,fp8,fp8,0,1.3598559697469075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,float16,0,0.8820052941640218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,64,0,1,fp8,fp8,0,6.304186503092448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,fp8,0,0.9043786525726318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,fp8,0,6.80844243367513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,64,128,1,fp8,fp8,0,0.8638880252838135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,float16,0,0.797216018040975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,float16,0,3.6329065958658853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,float16,0,6.8125762939453125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,fp8,0,0.8028159936269125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,64,128,1,fp8,fp8,0,0.7511040369669596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,fp8,0,6.823488235473633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,64,0,1,fp8,fp8,0,6.310096104939778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,64,0,1,fp8,fp8,0,3.3820212682088218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,fp8,0,3.656266530354818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,float16,0,0.7993120352427164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,float16,0,3.517807960510254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,fp8,0,0.8045547008514404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,64,128,1,fp8,fp8,0,0.752570629119873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,float16,0,0.8034453392028809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,fp8,0,3.517418543497721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,64,0,1,fp8,fp8,0,3.2741546630859375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,fp8,0,0.8094773292541504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,float16,0,3.5131200154622397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,64,128,1,fp8,fp8,0,0.7593973477681478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,float16,0,0.8075040181477865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,64,0,1,fp8,fp8,0,3.2742560704549155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,fp8,0,3.5244693756103516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,fp8,0,0.8154719670613607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,float16,0,3.5198612213134766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,64,128,1,fp8,fp8,0,0.7644960085550944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,float16,0,0.5830560127894083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,fp8,0,3.5265706380208335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,64,0,1,fp8,fp8,0,3.2812426884969077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,fp8,0,0.5844000180562338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,64,128,1,fp8,fp8,0,0.5517280101776123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,float16,0,3.5293280283610025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,float16,0,0.5840959946314493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,float16,0,2.002021312713623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,fp8,0,0.5861119826634725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,64,0,1,fp8,fp8,0,3.282954533894857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,fp8,0,3.5432799657185874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,64,128,1,fp8,fp8,0,0.5514986515045166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,64,0,1,fp8,fp8,0,1.8575146993001301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,fp8,0,2.002783934275309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,float16,0,1.992202599843343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,float16,0,0.5841226577758789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,fp8,0,0.5841546853383383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,64,128,1,fp8,fp8,0,0.552677313486735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,fp8,0,1.9951039950052898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,64,0,1,fp8,fp8,0,1.8514773050944011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,float16,0,0.5827039877573649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,fp8,0,0.585749348004659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,float16,0,1.9891146024068196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,64,128,1,fp8,fp8,0,0.551914652188619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,fp8,0,1.987280050913493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,64,0,1,fp8,fp8,0,1.856335957845052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,float16,0,0.584821343421936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,float16,0,1.9892640113830566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,fp8,0,0.5840426683425903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,64,128,1,fp8,fp8,0,0.5499893426895142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,64,0,1,fp8,fp8,0,1.8593279520670574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,fp8,0,1.988298734029134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,float16,0,1.9944799741109211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,fp8,0,2.0013866424560547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,64,0,1,fp8,fp8,0,1.8585972785949707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,float16,0,2.2593119939168296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,fp8,0,2.275872071584066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,64,128,1,fp8,fp8,0,2.0947200457255044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,float16,0,2.2672106424967446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,fp8,0,2.2843519846598306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,64,128,1,fp8,fp8,0,2.1069493293762207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,float16,0,2.2732693354288735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,float16,0,9.563568115234375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,64,0,1,fp8,fp8,0,8.884047826131185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,fp8,0,9.580026626586914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,fp8,0,2.293402671813965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,float16,0,9.591023763020834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,64,128,1,fp8,fp8,0,2.1200106938680015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,fp8,0,9.603546778361002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,float16,0,2.293008009592692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,64,0,1,fp8,fp8,0,8.90170669555664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,fp8,0,2.3141706784566245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,float16,0,9.601184209187826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,64,128,1,fp8,fp8,0,2.1463680267333984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,float16,0,1.3414079348246257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,fp8,0,1.373802661895752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,64,128,1,fp8,fp8,0,1.2988906701405842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,64,0,1,fp8,fp8,0,8.892768224080404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,fp8,0,9.62987200419108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,float16,0,5.085498809814453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,float16,0,1.1909333070119221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,float16,0,9.648645401000977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,fp8,0,1.2012053330739338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,64,0,1,fp8,fp8,0,8.940682729085287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,64,128,1,fp8,fp8,0,1.1131573518117268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,fp8,0,5.121477444966634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,fp8,0,9.660933176676432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,64,0,1,fp8,fp8,0,4.725791931152344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,float16,0,1.1937867005666096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,fp8,0,1.205125331878662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,float16,0,4.8949174880981445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,64,128,1,fp8,fp8,0,1.116426706314087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,fp8,0,4.899882634480794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,float16,0,1.2013440132141113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,64,0,1,fp8,fp8,0,4.54585075378418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,float16,0,4.892352104187012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,fp8,0,1.2113920052846272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,64,128,1,fp8,fp8,0,1.1250773270924885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,fp8,0,4.915013313293457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,64,0,1,fp8,fp8,0,4.5420427322387695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,float16,0,1.2089920043945312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,float16,0,4.896517435709636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,64,128,1,fp8,fp8,0,1.1359466711680095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,fp8,0,1.2222987016042073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,float16,0,0.7374773025512695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,fp8,0,0.7576479911804199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,fp8,0,4.916960080464681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,64,0,1,fp8,fp8,0,4.5496320724487305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,64,128,1,fp8,fp8,0,0.7232159773508707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,float16,0,4.920538584391276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,float16,0,0.6659200191497803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,float16,0,2.6420000394185386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,64,0,1,fp8,fp8,0,4.568085352579753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,fp8,0,4.926911989847819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,fp8,0,0.6706613699595133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,64,128,1,fp8,fp8,0,0.6294399897257487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,64,0,1,fp8,fp8,0,2.4765332539876304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,fp8,0,2.6648853619893393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,float16,0,2.5477120081583657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,float16,0,0.6672853628794352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,fp8,0,0.6727253595987955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,64,128,1,fp8,fp8,0,0.6310773293177286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,fp8,0,2.5512693723042807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,float16,0,0.6705599625905355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,64,0,1,fp8,fp8,0,2.379765351613363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,fp8,0,0.6762879689534506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,float16,0,2.553114732106527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,64,128,1,fp8,fp8,0,0.6339679956436157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,64,0,1,fp8,fp8,0,2.3759519259134927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,fp8,0,2.5560480753580728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,float16,0,0.6755786736806234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,float16,0,2.558634599049886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,fp8,0,0.6807999610900879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,64,128,1,fp8,fp8,0,0.6393119891484579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,fp8,0,2.559962590535482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,float16,0,0.49007999897003174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,64,0,1,fp8,fp8,0,2.3849172592163086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,fp8,0,0.4902240037918091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,float16,0,2.568661371866862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,64,128,1,fp8,fp8,0,0.46531200408935547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,float16,0,1.4793119430541992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,64,0,1,fp8,fp8,0,2.390240033467611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,fp8,0,2.5717652638753257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,float16,0,0.49028801918029785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,fp8,0,0.4925119876861572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,64,128,1,fp8,fp8,0,0.4635733366012573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,fp8,0,1.4816800753275554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,64,0,1,fp8,fp8,0,1.375157356262207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,float16,0,1.4709547360738118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,float16,0,0.49051201343536377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,fp8,0,0.49003732204437256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,64,128,1,fp8,fp8,0,0.4631679852803548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,fp8,0,1.4734080632527669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,64,0,1,fp8,fp8,0,1.3696106274922688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,float16,0,1.4691893259684246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,float16,0,0.49208001295725506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,fp8,0,0.491866668065389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,64,128,1,fp8,fp8,0,0.46564801534016925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,fp8,0,1.4728479385375977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,64,0,1,fp8,fp8,0,1.3728639284769695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,float16,0,0.4908213218053182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,float16,0,1.468549410502116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,fp8,0,0.49191999435424805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,64,128,1,fp8,fp8,0,0.46431998411814374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,fp8,0,1.4716800053914387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,64,0,1,fp8,fp8,0,1.3722400665283203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,float16,0,1.476912021636963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,fp8,0,1.4753813743591309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,64,0,1,fp8,fp8,0,1.3722400665283203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,float16,0,3.539221445719401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,64,128,1,fp8,fp8,0,3.290687878926595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,fp8,0,3.572341283162435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,float16,0,3.5896212259928384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,64,128,1,fp8,fp8,0,3.3352320988972983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,fp8,0,3.6095199584960938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,float16,0,3.6038026809692383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,64,0,1,fp8,fp8,0,11.830384572347006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,float16,0,12.758164723714193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,fp8,0,12.763498942057291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,float16,0,12.786688486735025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,fp8,0,3.6293652852376304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,64,128,1,fp8,fp8,0,3.3638505935668945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,fp8,0,12.835455576578775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,64,0,1,fp8,fp8,0,11.877812703450521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,float16,0,3.636826515197754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,fp8,0,3.6682186126708984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,float16,0,12.821397145589193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,64,128,1,fp8,fp8,0,3.4051411946614585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,float16,0,2.0551625887552896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,64,0,1,fp8,fp8,0,11.889466603597006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,fp8,0,2.098618666330973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,fp8,0,12.876859029134115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,64,128,1,fp8,fp8,0,1.9820000330607097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,float16,0,6.7609602610270185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,float16,0,1.8044373194376628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,float16,0,12.894954681396484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,fp8,0,1.8200747172037761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,64,128,1,fp8,fp8,0,1.6763520240783691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,fp8,0,12.91308848063151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,64,0,1,fp8,fp8,0,11.94162623087565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,64,0,1,fp8,fp8,0,6.275386810302734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,fp8,0,6.807781219482422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,float16,0,1.8089280128479004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,float16,0,6.429226557413737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,fp8,0,1.8242932955423992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,64,128,1,fp8,fp8,0,1.6819146474202473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,fp8,0,6.457632064819336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,64,0,1,fp8,fp8,0,5.974159876505534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,float16,0,1.8155733744303386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,fp8,0,1.8325866063435872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,64,128,1,fp8,fp8,0,1.692629337310791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,float16,0,6.454522450764974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,64,0,1,fp8,fp8,0,5.9847838083903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,float16,0,1.8285973866780598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,fp8,0,6.464613596598308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,fp8,0,1.8474399248758953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,64,128,1,fp8,fp8,0,1.7141547203063965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,float16,0,6.464122772216797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,float16,0,1.0745973587036133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,64,0,1,fp8,fp8,0,5.986629486083984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,fp8,0,1.1009066899617512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,fp8,0,6.47769037882487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,64,128,1,fp8,fp8,0,1.042080005009969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,float16,0,6.489109039306641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,float16,0,0.9552053610483805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,float16,0,3.4599412282307944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,64,0,1,fp8,fp8,0,6.016096115112305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,fp8,0,0.9638079802195231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,fp8,0,6.5010560353597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,64,128,1,fp8,fp8,0,0.895914634068807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,64,0,1,fp8,fp8,0,3.222288131713867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,fp8,0,3.4761759440104165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,float16,0,3.2980801264444985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,float16,0,0.9587093194325765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,fp8,0,0.9662933349609375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,64,128,1,fp8,fp8,0,0.8976319630940756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,fp8,0,3.3118985493977866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,float16,0,0.9619893232981364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,64,0,1,fp8,fp8,0,3.0719467798868814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,fp8,0,0.9713653723398844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,float16,0,3.301157315572103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,64,128,1,fp8,fp8,0,0.903866688410441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,fp8,0,3.3105812072753906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,64,0,1,fp8,fp8,0,3.070927937825521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,float16,0,0.9692426522572836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,float16,0,3.3157707850138345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,fp8,0,0.9809866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,64,128,1,fp8,fp8,0,0.9122986793518066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,float16,0,0.5939253171284994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,fp8,0,3.321589469909668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,64,0,1,fp8,fp8,0,3.0818986892700195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,fp8,0,0.6086826721827189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,64,128,1,fp8,fp8,0,0.5811200141906738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,float16,0,3.3236265182495117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,float16,0,1.8163092931111653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,float16,0,0.537173350652059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,64,0,1,fp8,fp8,0,3.088015874226888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,fp8,0,0.5395626624425253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,64,128,1,fp8,fp8,0,0.5070079962412516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,fp8,0,3.3350292841593423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,fp8,0,1.8270079294840496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,64,0,1,fp8,fp8,0,1.701200008392334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,float16,0,1.738111972808838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,float16,0,0.5373599926630656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,fp8,0,0.5414933363596598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,64,128,1,fp8,fp8,0,0.5077173312505087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,fp8,0,1.7409013112386067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,64,0,1,fp8,fp8,0,1.622634728749593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,float16,0,0.5405120054880778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,float16,0,1.7437973022460938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,fp8,0,0.5448746681213379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,64,128,1,fp8,fp8,0,0.5105760097503662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,64,0,1,fp8,fp8,0,1.6233919461568196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,fp8,0,1.7469013532002766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,float16,0,0.5429120063781738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,float16,0,1.7433546384175618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,fp8,0,0.5474613507588705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,64,128,1,fp8,fp8,0,0.5144639809926351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,fp8,0,1.7500905990600586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,64,0,1,fp8,fp8,0,1.623813311258952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,float16,0,0.393887996673584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,float16,0,1.7510239283243816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,fp8,0,0.39401598771413165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,64,128,1,fp8,fp8,0,0.3734346628189087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,float16,0,1.0336000124613445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,fp8,0,1.758853276570638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,float16,0,0.3917493422826131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,64,0,1,fp8,fp8,0,1.6329387029012044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,fp8,0,1.0333706537882488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,64,0,1,fp8,fp8,0,0.9598293304443359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,fp8,0,0.39323198795318604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,64,128,1,fp8,fp8,0,0.3728959957758586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,float16,0,1.0268747011820476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,float16,0,0.39210665225982666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,fp8,0,1.0272586345672607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,64,0,1,fp8,fp8,0,0.9620160261789957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,fp8,0,0.3938773473103841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,64,128,1,fp8,fp8,0,0.37241601943969727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,float16,0,1.024890661239624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,float16,0,0.3915199836095174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,fp8,0,1.0278027057647705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,64,0,1,fp8,fp8,0,0.9572746753692627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,64,128,1,fp8,fp8,0,0.3734133243560791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,fp8,0,0.3938506841659546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,float16,0,1.0304160118103027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,float16,0,0.3931359847386678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,fp8,0,1.0284266471862793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,fp8,0,0.39396266142527264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,64,0,1,fp8,fp8,0,0.9640586376190186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,64,128,1,fp8,fp8,0,0.3732000192006429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,float16,0,1.0287840366363525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,fp8,0,1.0332640012105305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,64,0,1,fp8,fp8,0,0.9596853256225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,float16,0,2.634533405303955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,64,128,1,fp8,fp8,0,2.437114715576172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,fp8,0,2.6539947191874185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,float16,0,2.647754669189453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,fp8,0,2.666522661844889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,64,128,1,fp8,fp8,0,2.4639892578125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,float16,0,7.701914469401042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,64,0,1,fp8,fp8,0,7.142672220865886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,fp8,0,7.70854377746582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,float16,0,2.656335989634196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,float16,0,7.716208140055339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,fp8,0,2.685781478881836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,64,128,1,fp8,fp8,0,2.485775947570801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,fp8,0,7.734202702840169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,64,0,1,fp8,fp8,0,7.162725448608398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,float16,0,2.68340269724528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,float16,0,7.733386357625325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,64,128,1,fp8,fp8,0,2.5103467305501304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,fp8,0,2.708693186442057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,float16,0,1.538010597229004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,64,0,1,fp8,fp8,0,7.18609619140625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,fp8,0,1.5721759796142578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,fp8,0,7.752309163411458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,64,128,1,fp8,fp8,0,1.4833973248799641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,float16,0,1.3533066113789876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,float16,0,4.135024070739746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,float16,0,7.7811838785807295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,fp8,0,7.806298573811849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,64,0,1,fp8,fp8,0,7.214922587076823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,fp8,0,1.3646559715270996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,64,128,1,fp8,fp8,0,1.258639971415202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,64,0,1,fp8,fp8,0,3.8636000951131186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,fp8,0,4.162752151489258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,float16,0,1.3580479621887207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,fp8,0,1.3681972821553547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,float16,0,3.9010187784830728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,64,128,1,fp8,fp8,0,1.2674400011698406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,fp8,0,3.920005480448405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,64,0,1,fp8,fp8,0,3.6287625630696616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,float16,0,1.3627467155456543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,fp8,0,1.3773172696431477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,float16,0,3.9086294174194336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,64,128,1,fp8,fp8,0,1.2735679944356282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,fp8,0,3.9210240046183267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,64,0,1,fp8,fp8,0,3.635045369466146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,float16,0,1.374560038248698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,float16,0,3.9207785924275718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,fp8,0,1.3903466860453289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,64,128,1,fp8,fp8,0,1.286197344462077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,fp8,0,3.938074747721354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,float16,0,0.8080159823099772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,64,0,1,fp8,fp8,0,3.6399307250976562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,fp8,0,0.8282453219095866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,float16,0,3.9394238789876304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,64,128,1,fp8,fp8,0,0.785919984181722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,float16,0,2.1302453676859536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,float16,0,0.7190986474355062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,fp8,0,3.9612372716267905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,64,0,1,fp8,fp8,0,3.6604906717936196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,fp8,0,0.7258240381876627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,fp8,0,2.1440745989481607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,64,0,1,fp8,fp8,0,1.9921120007832844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,64,128,1,fp8,fp8,0,0.676245371500651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,float16,0,2.014592011769613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,float16,0,0.7219040393829346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,fp8,0,0.7295040289560953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,64,128,1,fp8,fp8,0,0.6782240072886149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,fp8,0,2.0206079483032227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,64,0,1,fp8,fp8,0,1.8801652590433757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,float16,0,2.021418730417887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,float16,0,0.7242399851481119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,fp8,0,0.7313546339670817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,64,128,1,fp8,fp8,0,0.6806720097859701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,fp8,0,2.0293973286946616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,64,0,1,fp8,fp8,0,1.8784693082173665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,float16,0,2.026144027709961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,float16,0,0.729312022527059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,fp8,0,0.7378559907277426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,fp8,0,2.0347466468811035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,64,128,1,fp8,fp8,0,0.6889333724975586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,64,0,1,fp8,fp8,0,1.8896586100260417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,float16,0,0.4490933418273926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,float16,0,2.034154733022054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,fp8,0,0.46041067441304523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,64,128,1,fp8,fp8,0,0.4410933256149292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,float16,0,1.129413366317749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,fp8,0,2.0439252853393555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,float16,0,0.4039520025253296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,64,0,1,fp8,fp8,0,1.8914292653401692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,64,0,1,fp8,fp8,0,1.0636800130208333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,fp8,0,1.1422399679819744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,fp8,0,0.40667200088500977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,64,128,1,fp8,fp8,0,0.38355199495951336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,float16,0,1.0773173173268635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,float16,0,0.40531198183695477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,fp8,0,1.0771626631418865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,64,0,1,fp8,fp8,0,1.0060106913248699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,fp8,0,0.40778664747873944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,64,128,1,fp8,fp8,0,0.3843839963277181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,float16,0,1.0802079836527507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,float16,0,0.4065013329188029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,fp8,0,0.4103413422902425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,fp8,0,1.0814932982126872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,64,0,1,fp8,fp8,0,1.008090655008952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,64,128,1,fp8,fp8,0,0.3871946732203166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,float16,0,1.0822986761728923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,float16,0,0.41042133172353107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,64,0,1,fp8,fp8,0,1.0081600348154705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,fp8,0,1.087077299753825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,fp8,0,0.4144373337427775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,64,128,1,fp8,fp8,0,0.3901439905166626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,float16,0,1.0876373449961345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,float16,0,0.3018239935239156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,fp8,0,0.3028266628583272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,fp8,0,1.0901386737823486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,float16,0,0.6624053319295248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,64,0,1,fp8,fp8,0,0.618559996287028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,64,0,1,fp8,fp8,0,1.012938658396403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,64,128,1,fp8,fp8,0,0.2855253418286641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,fp8,0,0.6672373612721761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,float16,0,0.29920534292856854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,fp8,0,0.30129067103068036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,fp8,0,0.6586346626281738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,64,0,1,fp8,fp8,0,0.6202986637751261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,float16,0,0.658240000406901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,64,128,1,fp8,fp8,0,0.28514132897059125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,float16,0,0.29808000723520917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,fp8,0,0.29948266347249347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,float16,0,0.29977599779764813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,fp8,0,0.6599893172581991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,fp8,0,0.3001386721928914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,float16,0,0.6622453530629476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,64,128,1,fp8,fp8,0,0.28542933861414593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,64,0,1,fp8,fp8,0,0.6190986633300781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,float16,0,0.6583306789398193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,64,128,1,fp8,fp8,0,0.2874400019645691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,fp8,0,0.6591413418451945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,float16,0,0.30056534210840863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,64,0,1,fp8,fp8,0,0.6185973485310873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,fp8,0,0.30004799365997314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,float16,0,0.664954662322998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,64,0,1,fp8,fp8,0,0.6197706858317057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,64,128,1,fp8,fp8,0,0.28754132986068726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,fp8,0,0.6593439976374308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,float16,0,3.4871412913004556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,64,128,1,fp8,fp8,0,3.22977606455485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,fp8,0,3.5122238794962564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,float16,0,3.5270506540934243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,64,128,1,fp8,fp8,0,3.2764320373535156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,fp8,0,3.553882598876953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,float16,0,7.7188371022542315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,64,0,1,fp8,fp8,0,7.183008193969727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,fp8,0,7.755125045776367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,float16,0,7.764181137084961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,float16,0,3.543621381123861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,64,128,1,fp8,fp8,0,3.3017705281575522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,fp8,0,3.5665918986002603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,fp8,0,7.806277592976888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,64,0,1,fp8,fp8,0,7.223306655883789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,float16,0,3.577312151590983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,float16,0,7.809754689534505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,64,128,1,fp8,fp8,0,3.3450133005777993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,fp8,0,3.6022987365722656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,64,0,1,fp8,fp8,0,7.241690953572591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,fp8,0,7.832149505615234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,float16,0,2.003023942311605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,fp8,0,2.037557284037272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,64,128,1,fp8,fp8,0,1.9225172996520996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,float16,0,7.860917409261067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,float16,0,4.185680071512858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,fp8,0,7.877525329589844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,float16,0,1.7485334078470867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,fp8,0,4.220207850138347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,fp8,0,1.7644747098286946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,64,0,1,fp8,fp8,0,3.9227733612060547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,64,128,1,fp8,fp8,0,1.6193013191223145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,64,0,1,fp8,fp8,0,7.289445241292317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,float16,0,3.8799893061319985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,float16,0,1.7530934015909831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,fp8,0,1.7674773534138997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,fp8,0,3.896128018697103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,64,0,1,fp8,fp8,0,3.609583854675293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,64,128,1,fp8,fp8,0,1.6289119720458984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,float16,0,3.896575927734375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,float16,0,1.7611039479573567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,fp8,0,1.7804320653279622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,fp8,0,3.9031359354654946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,64,128,1,fp8,fp8,0,1.6394880612691243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,64,0,1,fp8,fp8,0,3.619082768758138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,float16,0,3.9026400248209634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,float16,0,1.7799092928568523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,64,128,1,fp8,fp8,0,1.6581333478291829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,fp8,0,1.795178731282552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,64,0,1,fp8,fp8,0,3.6253013610839844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,float16,0,1.0291039943695068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,fp8,0,3.9266347885131836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,64,128,1,fp8,fp8,0,0.9928692976633707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,float16,0,2.1279093424479165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,fp8,0,1.0493226846059163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,fp8,0,2.1572267214457193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,float16,0,3.9295199712117515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,fp8,0,3.9467786153157554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,64,0,1,fp8,fp8,0,3.6437225341796875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,float16,0,0.9062346617380778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,fp8,0,0.9127039909362793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,64,0,1,fp8,fp8,0,2.0066986083984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,64,128,1,fp8,fp8,0,0.8450453281402588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,float16,0,1.9857014020284016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,float16,0,0.9086080392201742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,fp8,0,0.9167733192443848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,64,128,1,fp8,fp8,0,0.8488372961680094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,fp8,0,1.9977493286132812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,64,0,1,fp8,fp8,0,1.8531360626220703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,float16,0,1.9924853642781575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,float16,0,0.9123733043670654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,fp8,0,2.00328000386556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,fp8,0,0.9228479862213135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,64,128,1,fp8,fp8,0,0.8529280026753744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,64,0,1,fp8,fp8,0,1.8540372848510742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,float16,0,1.9972052574157715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,float16,0,0.918725331624349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,fp8,0,0.9297440052032471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,fp8,0,2.0084427197774253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,64,128,1,fp8,fp8,0,0.8627893129984537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,64,0,1,fp8,fp8,0,1.85970671971639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,float16,0,0.5435680150985718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,float16,0,2.0119519233703613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,fp8,0,0.5570559899012247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,64,128,1,fp8,fp8,0,0.5282986561457316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,fp8,0,2.017903963724772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,float16,0,1.1108373006184895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,64,0,1,fp8,fp8,0,1.8678720792134602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,float16,0,0.4834933280944824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,fp8,0,1.1227467060089111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,64,0,1,fp8,fp8,0,1.0471733411153157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,fp8,0,0.4877440134684245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,64,128,1,fp8,fp8,0,0.455130656560262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,float16,0,1.0392693678538005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,float16,0,0.4854559898376465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,fp8,0,1.0444640318552654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,64,0,1,fp8,fp8,0,0.9712639649709066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,fp8,0,0.48948800563812256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,64,128,1,fp8,fp8,0,0.45742400487263996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,float16,0,1.04094401995341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,float16,0,0.48816001415252686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,fp8,0,1.0438613096872966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,64,0,1,fp8,fp8,0,0.9730133215586344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,fp8,0,0.49185601870218915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,64,128,1,fp8,fp8,0,0.459663987159729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,float16,0,1.045514663060506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,float16,0,0.4919786850611369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,fp8,0,1.0502026875813801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,64,0,1,fp8,fp8,0,0.9763680299123129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,fp8,0,0.4962559938430786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,64,128,1,fp8,fp8,0,0.4636106491088867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,float16,0,1.0495093663533528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,float16,0,0.30500266949335736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,fp8,0,1.0537226994832356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,fp8,0,0.3115413387616475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,float16,0,0.6024373372395834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,fp8,0,0.6091839869817098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,64,0,1,fp8,fp8,0,0.5715039968490601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,64,0,1,fp8,fp8,0,0.9829493363698324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,fp8,0,0.2730026642481486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,float16,0,0.5622613430023193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,64,128,1,fp8,fp8,0,0.26053333282470703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,64,128,1,fp8,fp8,0,0.2995306650797526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,float16,0,0.27100799481074017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,float16,0,0.2691520055135091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,fp8,0,0.563151995340983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,64,0,1,fp8,fp8,0,0.5315093199412028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,fp8,0,0.27214399973551434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,fp8,0,0.5661173264185587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,float16,0,0.5645279884338379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,64,128,1,fp8,fp8,0,0.2608426610628764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,float16,0,0.27138666311899823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,64,0,1,fp8,fp8,0,0.5332053502400717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,fp8,0,0.2746933301289876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,64,128,1,fp8,fp8,0,0.26291733980178833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,float16,0,0.2767573396364848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,fp8,0,0.5687199831008911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,fp8,0,0.2787040074666341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,float16,0,0.565829316775004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,64,0,1,fp8,fp8,0,0.5342719952265421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,float16,0,0.570741335550944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,64,128,1,fp8,fp8,0,0.2657173275947571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,float16,0,0.20492267608642578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,fp8,0,0.5746719837188721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,64,0,1,fp8,fp8,0,0.5374506711959839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,float16,0,0.36841599146525067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,fp8,0,0.20560532808303833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,64,128,1,fp8,fp8,0,0.19520533084869385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,fp8,0,0.3714880148569743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,64,0,1,fp8,fp8,0,0.3432533343633016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,float16,0,0.20087466637293497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,float16,0,0.36721599102020264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,fp8,0,0.20028799772262573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,64,128,1,fp8,fp8,0,0.19292267163594565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,fp8,0,0.36495999495188397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,64,0,1,fp8,fp8,0,0.34483734766642254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,float16,0,0.19979200760523477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,fp8,0,0.20314133167266846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,float16,0,0.36294933160146076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,64,128,1,fp8,fp8,0,0.19125866889953613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,fp8,0,0.3673493464787801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,64,0,1,fp8,fp8,0,0.3429280122121175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,float16,0,0.2032053271929423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,float16,0,0.36254934469858807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,fp8,0,0.20324265956878662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,64,128,1,fp8,fp8,0,0.191103994846344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,float16,0,0.3675893147786458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,fp8,0,0.364682674407959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,64,0,1,fp8,fp8,0,0.34485868612925213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,float16,0,0.20188800493876138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,fp8,0,0.2014346718788147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,64,128,1,fp8,fp8,0,0.19302932421366373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,fp8,0,0.3683946530024211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,64,0,1,fp8,fp8,0,0.3418346643447876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,float16,0,2.58734401067098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,64,128,1,fp8,fp8,0,2.392954667409261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,fp8,0,2.6081013679504395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,float16,0,2.600378672281901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,float16,0,4.833642641703288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,64,0,1,fp8,fp8,0,4.491605440775554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,fp8,0,4.853935877482097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,fp8,0,2.623568058013916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,64,128,1,fp8,fp8,0,2.4180960655212402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,float16,0,4.862069447835286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,float16,0,2.61517333984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,fp8,0,4.879791895548503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,fp8,0,2.6365493138631186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,64,0,1,fp8,fp8,0,4.5174986521403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,64,128,1,fp8,fp8,0,2.4375839233398438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,float16,0,4.868837356567383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,float16,0,2.63700262705485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,64,0,1,fp8,fp8,0,4.545200030008952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,fp8,0,4.897765477498372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,64,128,1,fp8,fp8,0,2.4613439242045083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,fp8,0,2.659733295440674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,float16,0,1.4997545878092449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,fp8,0,1.529146671295166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,float16,0,4.907509485880534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,64,128,1,fp8,fp8,0,1.4389173189798992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,float16,0,2.665093262990316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,fp8,0,4.928943951924642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,fp8,0,2.687391916910807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,float16,0,1.3143146832784016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,fp8,0,1.3251466751098633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,64,0,1,fp8,fp8,0,4.565765380859375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,64,0,1,fp8,fp8,0,2.504997412363688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,64,128,1,fp8,fp8,0,1.219333330790202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,float16,0,2.449567953745524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,float16,0,1.319808006286621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,fp8,0,1.3271199862162273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,64,128,1,fp8,fp8,0,1.2224799791971843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,fp8,0,2.4613706270853677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,64,0,1,fp8,fp8,0,2.275152047475179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,float16,0,2.4532480239868164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,float16,0,1.325984001159668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,fp8,0,2.4682933489481607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,64,0,1,fp8,fp8,0,2.2861760457356772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,fp8,0,1.337264060974121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,64,128,1,fp8,fp8,0,1.2317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,float16,0,2.4588586489359536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,float16,0,1.3367306391398113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,fp8,0,2.473013401031494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,fp8,0,1.3489440282185872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,64,0,1,fp8,fp8,0,2.291930675506592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,64,128,1,fp8,fp8,0,1.2446666558583577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,float16,0,0.7743893464406332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,float16,0,2.4798986117045083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,fp8,0,2.493504047393799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,64,128,1,fp8,fp8,0,0.7477333545684814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,fp8,0,0.7908853689829508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,float16,0,1.3644372622172039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,64,0,1,fp8,fp8,0,2.3056106567382812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,fp8,0,1.3808959325154622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,float16,0,0.6825066407521566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,64,0,1,fp8,fp8,0,1.291269302368164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,fp8,0,0.6878506342569987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,64,128,1,fp8,fp8,0,0.6360693375269572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,float16,0,1.2598346869150798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,float16,0,0.6840960184733073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,64,0,1,fp8,fp8,0,1.1761120160420735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,fp8,0,1.2657439708709717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,fp8,0,0.6909493605295817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,64,128,1,fp8,fp8,0,0.6394559939702352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,float16,0,1.2646133104960124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,fp8,0,1.2694346904754639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,float16,0,0.6874773502349854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,64,0,1,fp8,fp8,0,1.180239995320638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,fp8,0,0.6946240266164144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,64,128,1,fp8,fp8,0,0.6437546809514364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,float16,0,1.2661173343658447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,float16,0,0.6930720011393229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,fp8,0,1.2767626444498699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,64,0,1,fp8,fp8,0,1.183498700459798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,fp8,0,0.7008586724599203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,64,128,1,fp8,fp8,0,0.6499093373616537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,float16,0,1.2752587000528972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,float16,0,0.4124000072479248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,fp8,0,1.2810719807942708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,fp8,0,0.42260265350341797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,float16,0,0.7183893521626791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,64,128,1,fp8,fp8,0,0.40170133113861084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,64,0,1,fp8,fp8,0,1.1906507015228271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,float16,0,0.3635840018590291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,fp8,0,0.727674643198649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,fp8,0,0.3670773506164551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,64,128,1,fp8,fp8,0,0.34542401631673175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,64,0,1,fp8,fp8,0,0.6818719704945883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,float16,0,0.6656906604766846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,fp8,0,0.6671199798583984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,float16,0,0.3649653196334839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,64,0,1,fp8,fp8,0,0.6254773139953613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,fp8,0,0.3683040142059326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,64,128,1,fp8,fp8,0,0.3476853370666504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,float16,0,0.6642719904581705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,fp8,0,0.668399969736735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,float16,0,0.36793601512908936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,64,0,1,fp8,fp8,0,0.627349336942037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,fp8,0,0.3718453248341878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,float16,0,0.667738676071167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,64,128,1,fp8,fp8,0,0.34891200065612793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,fp8,0,0.6727626323699951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,float16,0,0.3721706469853719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,64,0,1,fp8,fp8,0,0.6274720033009847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,fp8,0,0.37651201089223224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,float16,0,0.6725866794586182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,64,128,1,fp8,fp8,0,0.3524693250656128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,float16,0,0.2339573303858439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,64,128,1,fp8,fp8,0,0.22953067223230997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,fp8,0,0.6762932936350504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,64,0,1,fp8,fp8,0,0.6313973267873129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,float16,0,0.39612265427907306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,fp8,0,0.23959465821584067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,fp8,0,0.4000320037206014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,64,0,1,fp8,fp8,0,0.37725865840911865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,float16,0,0.205402672290802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,float16,0,0.3641013304392497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,fp8,0,0.20733332633972168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,64,128,1,fp8,fp8,0,0.19965867201487222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,fp8,0,0.36636801560719806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,64,0,1,fp8,fp8,0,0.3468693494796753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,float16,0,0.20550400018692017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,64,0,1,fp8,fp8,0,0.348906675974528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,float16,0,0.36397333939870197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,fp8,0,0.2076586683591207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,64,128,1,fp8,fp8,0,0.19937600692113241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,fp8,0,0.3670773506164551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,float16,0,0.20746133724848428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,float16,0,0.36690131823221844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,fp8,0,0.2094879945119222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,64,128,1,fp8,fp8,0,0.20163200298945108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,fp8,0,0.36745067437489826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,64,0,1,fp8,fp8,0,0.34881067276000977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,float16,0,0.21037866671880087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,float16,0,0.3673653205235799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,fp8,0,0.21153066555658975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,64,128,1,fp8,fp8,0,0.20524267355600992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,fp8,0,0.37057065963745117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,64,0,1,fp8,fp8,0,0.35227731863657635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,fp8,0,0.24971733490626016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,float16,0,0.1616266667842865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,float16,0,0.25060800711313885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,fp8,0,0.16247466206550598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,64,128,1,fp8,fp8,0,0.15249600013097128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,64,0,1,fp8,fp8,0,0.23607999086380005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,float16,0,0.1572373310724894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,float16,0,0.246671994527181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,fp8,0,0.1574560006459554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,64,128,1,fp8,fp8,0,0.1504586637020111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,fp8,0,0.24596265951792398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,64,0,1,fp8,fp8,0,0.23254932959874472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,float16,0,0.1564533313115438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,float16,0,0.24807999531428018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,fp8,0,0.15659200151761374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,64,128,1,fp8,fp8,0,0.15004799763361612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,fp8,0,0.24707200129826865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,64,0,1,fp8,fp8,0,0.2329439918200175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,float16,0,0.15818132956822714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,float16,0,0.24517333507537842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,fp8,0,0.15819199879964194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,64,128,1,fp8,fp8,0,0.14828800161679587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,fp8,0,0.24627200762430826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,64,0,1,fp8,fp8,0,0.23233066002527872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,float16,0,0.1583466629187266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,float16,0,0.24820800622304282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,fp8,0,0.15822399655977884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,64,128,1,fp8,fp8,0,0.15068266789118448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,fp8,0,0.24447466929753622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,64,0,1,fp8,fp8,0,0.23199999332427979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,float16,0,3.446645418802897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,64,128,1,fp8,fp8,0,3.1766026814778647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,fp8,0,3.4730507532755532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,float16,0,3.498005231221517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,float16,0,5.207375844319661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,64,0,1,fp8,fp8,0,4.8227841059366865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,fp8,0,5.223711967468262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,64,128,1,fp8,fp8,0,3.2000160217285156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,fp8,0,3.5023625691731772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,float16,0,5.246506690979004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,float16,0,3.4987945556640625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,fp8,0,5.257589340209961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,fp8,0,3.5220371882120767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,64,0,1,fp8,fp8,0,4.8613332112630205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,64,128,1,fp8,fp8,0,3.221989313761393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,float16,0,5.246613184611003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,float16,0,3.573391914367676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,fp8,0,5.272704124450684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,64,128,1,fp8,fp8,0,3.2691733042399087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,fp8,0,3.5641492207845054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,64,0,1,fp8,fp8,0,4.87497615814209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,float16,0,5.3231201171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,float16,0,1.9760425885518391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,64,128,1,fp8,fp8,0,1.8944586118062336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,fp8,0,2.006218592325846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,64,0,1,fp8,fp8,0,4.9123945236206055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,fp8,0,5.315125465393066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,float16,0,2.886656125386556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,fp8,0,2.9149599075317383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,float16,0,1.718997319539388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,64,0,1,fp8,fp8,0,2.730463981628418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,fp8,0,1.7322826385498047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,64,128,1,fp8,fp8,0,1.587648073832194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,float16,0,2.599397341410319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,float16,0,1.7250560124715169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,64,0,1,fp8,fp8,0,2.4181013107299805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,fp8,0,2.6103893915812173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,64,128,1,fp8,fp8,0,1.5995306968688965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,fp8,0,1.739733378092448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,float16,0,2.612309296925863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,fp8,0,2.618768056233724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,float16,0,1.7321707407633464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,64,0,1,fp8,fp8,0,2.428938706715902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,fp8,0,1.748261292775472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,64,128,1,fp8,fp8,0,1.6104532877604167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,float16,0,2.6184800465901694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,64,0,1,fp8,fp8,0,2.4418506622314453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,fp8,0,2.6354880332946777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,float16,0,1.7461172739664714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,fp8,0,1.7653600374857585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,64,128,1,fp8,fp8,0,1.624901294708252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,float16,0,2.6341493924458823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,float16,0,1.0039199988047283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,fp8,0,1.023045301437378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,fp8,0,2.6549599965413413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,64,0,1,fp8,fp8,0,2.4546507199605307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,64,128,1,fp8,fp8,0,0.9647839864095052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,float16,0,1.4678719838460286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,fp8,0,1.4823466936747234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,float16,0,0.8785173098246256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,64,0,1,fp8,fp8,0,1.387957255045573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,fp8,0,0.8864160378774008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,float16,0,1.3245386282602947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,64,128,1,fp8,fp8,0,0.8142560323079427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,float16,0,0.8798666795094808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,fp8,0,1.334810733795166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,64,0,1,fp8,fp8,0,1.2374560038248699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,64,128,1,fp8,fp8,0,0.8212746779123942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,fp8,0,0.8894399801890055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,float16,0,1.3309333324432373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,fp8,0,1.335034688313802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,64,0,1,fp8,fp8,0,1.2407200336456299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,float16,0,0.8861333529154459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,fp8,0,0.8938720226287842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,64,128,1,fp8,fp8,0,0.8262560367584229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,float16,0,1.334917386372884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,float16,0,0.8918560345967611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,fp8,0,1.3406933148701985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,64,0,1,fp8,fp8,0,1.2481119632720947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,fp8,0,0.9027893543243408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,64,128,1,fp8,fp8,0,0.833733320236206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,float16,0,1.3439839680989583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,float16,0,0.5208106835683187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,64,0,1,fp8,fp8,0,1.2555092970530193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,fp8,0,1.353925387064616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,float16,0,0.7594827016194662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,fp8,0,0.5316853523254395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,64,128,1,fp8,fp8,0,0.50273064772288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,fp8,0,0.7709493637084961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,float16,0,0.4572319984436035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,64,0,1,fp8,fp8,0,0.7202666600545248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,fp8,0,0.4618133306503296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,float16,0,0.6884160041809082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,64,128,1,fp8,fp8,0,0.4291679859161377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,float16,0,0.45924798647562665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,fp8,0,0.6932746569315592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,64,0,1,fp8,fp8,0,0.6456960042317709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,fp8,0,0.4629280169804891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,float16,0,0.6892053286234537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,64,128,1,fp8,fp8,0,0.43164265155792236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,fp8,0,0.6920426686604818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,64,0,1,fp8,fp8,0,0.6485546827316284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,fp8,0,0.46783467133839923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,float16,0,0.6932319800059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,float16,0,0.46299199263254803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,64,128,1,fp8,fp8,0,0.4347626765569051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,fp8,0,0.6998399893442789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,64,0,1,fp8,fp8,0,0.6503573258717855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,float16,0,0.4666666587193807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,fp8,0,0.4709920088450114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,float16,0,0.6991093158721924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,fp8,0,0.7018667062123617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,64,128,1,fp8,fp8,0,0.43727465470631915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,fp8,0,0.28699199358622235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,float16,0,0.2791253328323364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,64,0,1,fp8,fp8,0,0.6556959946950277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,float16,0,0.404149333635966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,64,128,1,fp8,fp8,0,0.27350399891535443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,fp8,0,0.41015466054280597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,float16,0,0.24353599548339844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,64,0,1,fp8,fp8,0,0.3867093324661255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,float16,0,0.36453866958618164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,fp8,0,0.246394673983256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,64,128,1,fp8,fp8,0,0.23461333910624185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,fp8,0,0.3677440087000529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,float16,0,0.24437866608301798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,64,0,1,fp8,fp8,0,0.3487786849339803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,float16,0,0.365013321240743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,fp8,0,0.24687467018763223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,64,128,1,fp8,fp8,0,0.23460266987482706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,fp8,0,0.36901334921518963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,64,0,1,fp8,fp8,0,0.3492853244145711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,float16,0,0.2452053427696228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,float16,0,0.3675626516342163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,fp8,0,0.24824533859888712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,64,128,1,fp8,fp8,0,0.23625065883000693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,fp8,0,0.372378667195638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,64,128,1,fp8,fp8,0,0.24041066567103067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,64,0,1,fp8,fp8,0,0.35097066561381024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,float16,0,0.24889600276947021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,float16,0,0.37276800473531085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,fp8,0,0.2521333297093709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,fp8,0,0.37398401896158856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,64,0,1,fp8,fp8,0,0.3533066511154175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,float16,0,0.15851733088493347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,64,0,1,fp8,fp8,0,0.22196799516677856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,float16,0,0.13766933480898538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,float16,0,0.22851200898488364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,fp8,0,0.16267733772595724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,64,128,1,fp8,fp8,0,0.15785066286722818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,fp8,0,0.23229867219924927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,float16,0,0.20571200052897134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,fp8,0,0.1395626664161682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,64,128,1,fp8,fp8,0,0.13196800152460733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,fp8,0,0.207914670308431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,64,0,1,fp8,fp8,0,0.1932906707127889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,float16,0,0.13798399766286215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,float16,0,0.2060533364613851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,fp8,0,0.13802133003870645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,64,128,1,fp8,fp8,0,0.1328426698843638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,fp8,0,0.20732800165812174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,64,0,1,fp8,fp8,0,0.19539733727773032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,float16,0,0.13766933480898538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,float16,0,0.20653865734736124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,fp8,0,0.139765332142512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,64,128,1,fp8,fp8,0,0.13405332962671915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,fp8,0,0.2076853315035502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,64,0,1,fp8,fp8,0,0.19818667570749918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,float16,0,0.13938132921854654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,float16,0,0.2079626719156901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,fp8,0,0.14101866881052652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,64,128,1,fp8,fp8,0,0.13593600193659464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,fp8,0,0.2104640007019043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,64,0,1,fp8,fp8,0,0.19926933447519937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,float16,0,0.11147200067838033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,float16,0,0.10896000266075134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,float16,0,0.15105066696802774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,float16,0,0.14800000190734863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,fp8,0,0.10969066619873047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,64,128,1,fp8,fp8,0,0.1069653332233429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,fp8,0,0.14969066778818765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,fp8,0,0.15019733707110086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,64,0,1,fp8,fp8,0,0.1425226628780365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,float16,0,0.15027733643849692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,fp8,0,0.11107200384140015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,64,128,1,fp8,fp8,0,0.10332266489664714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,fp8,0,0.15034666657447815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,64,0,1,fp8,fp8,0,0.14180266857147217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,float16,0,0.10935999949773152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,fp8,0,0.11132267117500305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,64,128,1,fp8,fp8,0,0.10507733623186748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,64,128,1,fp8,fp8,0,0.10506133238474528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,64,0,1,fp8,fp8,0,0.1421280006567637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,float16,0,0.10903466741243999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,float16,0,0.15043200055758157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,fp8,0,0.11097600062688191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,fp8,0,0.14993066589037576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,64,128,1,fp8,fp8,0,0.10525332887967427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,64,0,1,fp8,fp8,0,0.1418826679388682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,float16,0,0.10943999886512756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,float16,0,0.15029866496721903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,fp8,0,0.10911466677983601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,fp8,0,0.1502400040626526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,64,0,1,fp8,fp8,0,0.14191466569900513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,float16,0,2.557744026184082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,64,128,1,fp8,fp8,0,2.3633227348327637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,fp8,0,2.579946676890055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,float16,0,3.4109652837117515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,float16,0,2.5795413653055825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,64,0,1,fp8,fp8,0,3.1769545873006186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,fp8,0,3.428959846496582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,64,128,1,fp8,fp8,0,2.3862080574035645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,fp8,0,2.598618666330973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,float16,0,3.43446413675944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,float16,0,2.5987680753072104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,fp8,0,3.458319981892904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,64,0,1,fp8,fp8,0,3.1963411966959634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,fp8,0,2.617834726969401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,float16,0,3.4600906372070312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,64,128,1,fp8,fp8,0,2.4040212631225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,float16,0,2.620570659637451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,64,0,1,fp8,fp8,0,3.2143627802530923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,fp8,0,3.4773759841918945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,64,128,1,fp8,fp8,0,2.428592046101888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,fp8,0,2.637200037638346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,float16,0,3.4883413314819336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,fp8,0,3.50382391611735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,float16,0,1.4778186480204265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,fp8,0,1.5027146339416504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,64,0,1,fp8,fp8,0,3.248016039530436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,float16,0,1.932400067647298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,64,128,1,fp8,fp8,0,1.4171786308288574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,64,0,1,fp8,fp8,0,1.835898717244466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,fp8,0,1.9532052675882976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,float16,0,1.285797357559204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,fp8,0,1.2967573006947835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,float16,0,1.7151093482971191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,64,128,1,fp8,fp8,0,1.1913119951883953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,float16,0,1.2937119801839192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,fp8,0,1.7279946009318035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,64,0,1,fp8,fp8,0,1.6014827092488606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,fp8,0,1.305082639058431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,float16,0,1.7235573132832844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,64,128,1,fp8,fp8,0,1.1995200316111247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,fp8,0,1.734613259633382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,64,0,1,fp8,fp8,0,1.6118613878885906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,float16,0,1.3016586303710938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,fp8,0,1.310096025466919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,64,128,1,fp8,fp8,0,1.2067200342814128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,float16,0,1.7320000330607097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,64,0,1,fp8,fp8,0,1.6183786392211914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,fp8,0,1.7434132893880208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,float16,0,1.3107999960581462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,fp8,0,1.3248586654663086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,64,128,1,fp8,fp8,0,1.2198773225148518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,float16,0,1.7475840250651042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,float16,0,0.7546186447143555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,64,0,1,fp8,fp8,0,1.6304052670796711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,fp8,0,1.7591573397318523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,float16,0,0.9830933411916097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,fp8,0,0.7699946562449137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,64,128,1,fp8,fp8,0,0.72707732518514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,fp8,0,0.9987786610921224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,float16,0,0.6585280100504557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,64,0,1,fp8,fp8,0,0.9389333724975586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,float16,0,0.8788906733194987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,fp8,0,0.664629340171814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,64,128,1,fp8,fp8,0,0.6129386822382609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,fp8,0,0.8829653263092041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,float16,0,0.6619733174641927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,64,0,1,fp8,fp8,0,0.823578675587972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,64,128,1,fp8,fp8,0,0.6171146631240845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,float16,0,0.8825493653615316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,fp8,0,0.6690346399943033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,fp8,0,0.8893173535664877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,64,0,1,fp8,fp8,0,0.8263946374257406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,float16,0,0.6653439998626709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,64,128,1,fp8,fp8,0,0.6213546593983968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,fp8,0,0.6721546649932861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,float16,0,0.8853332996368408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,fp8,0,0.8931039969126383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,64,0,1,fp8,fp8,0,0.8314346472422282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,float16,0,0.6720693111419678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,fp8,0,0.6796320279439291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,float16,0,0.8923786481221517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,64,128,1,fp8,fp8,0,0.6285599867502848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,float16,0,0.394597331682841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,fp8,0,0.8992906411488851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,float16,0,0.5144693454106649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,64,0,1,fp8,fp8,0,0.8381653626759847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,fp8,0,0.4031519889831543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,64,128,1,fp8,fp8,0,0.38152531782786053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,fp8,0,0.5230773289998373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,float16,0,0.459114670753479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,float16,0,0.3429226477940877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,64,0,1,fp8,fp8,0,0.49196799596150714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,fp8,0,0.34623467922210693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,64,128,1,fp8,fp8,0,0.3251519997914632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,fp8,0,0.4614186684290568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,float16,0,0.4588213364283244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,64,0,1,fp8,fp8,0,0.43532800674438477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,float16,0,0.3461173375447591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,fp8,0,0.34834667046864826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,64,0,1,fp8,fp8,0,0.4365280071894328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,64,128,1,fp8,fp8,0,0.32754133145014447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,fp8,0,0.4614986578623454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,float16,0,0.3479199806849162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,64,128,1,fp8,fp8,0,0.33019200960795086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,float16,0,0.4626773198445638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,fp8,0,0.3529493411382039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,fp8,0,0.4665173292160034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,64,0,1,fp8,fp8,0,0.43771199385325116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,float16,0,0.3509440024693807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,fp8,0,0.35512534777323407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,fp8,0,0.4709920088450114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,float16,0,0.4662880102793376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,64,128,1,fp8,fp8,0,0.33236799637476605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,float16,0,0.21377599239349365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,fp8,0,0.2817759911219279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,float16,0,0.27525333563486737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,64,0,1,fp8,fp8,0,0.44042666753133136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,fp8,0,0.21860800186793009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,64,128,1,fp8,fp8,0,0.20966933170954385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,64,0,1,fp8,fp8,0,0.26742400725682575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,float16,0,0.18474666277567545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,float16,0,0.24449066321055093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,fp8,0,0.18529067436854044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,64,128,1,fp8,fp8,0,0.17988266547520956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,fp8,0,0.24636799097061157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,64,0,1,fp8,fp8,0,0.23666133483250937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,fp8,0,0.24742400646209717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,64,0,1,fp8,fp8,0,0.238154669602712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,float16,0,0.18491200606028238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,float16,0,0.24424533049265543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,fp8,0,0.18722132841746011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,64,128,1,fp8,fp8,0,0.17893334229787192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,float16,0,0.18498667081197104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,float16,0,0.2462986707687378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,fp8,0,0.18879467248916626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,64,128,1,fp8,fp8,0,0.18209600448608398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,fp8,0,0.2493386665980021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,64,0,1,fp8,fp8,0,0.23836266994476318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,fp8,0,0.2527359922726949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,float16,0,0.18897600968678793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,float16,0,0.24860266844431558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,fp8,0,0.18995199600855509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,64,128,1,fp8,fp8,0,0.18491733074188232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,fp8,0,0.16153599818547568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,64,0,1,fp8,fp8,0,0.24237332741419473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,float16,0,0.1239306628704071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,float16,0,0.1585706671079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,fp8,0,0.12600533167521158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,64,128,1,fp8,fp8,0,0.10321600238482158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,64,128,1,fp8,fp8,0,0.12422399719556172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,64,0,1,fp8,fp8,0,0.1567359964052836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,64,0,1,fp8,fp8,0,0.13491732875506082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,float16,0,0.10916800300280254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,float16,0,0.1425493359565735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,fp8,0,0.1090773344039917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,fp8,0,0.14421332875887552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,64,0,1,fp8,fp8,0,0.13569600383440653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,float16,0,0.1086293359597524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,float16,0,0.1429333289464315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,fp8,0,0.11121066411336263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,fp8,0,0.1099679966767629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,64,128,1,fp8,fp8,0,0.10319999853769939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,64,0,1,fp8,fp8,0,0.13609600067138672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,fp8,0,0.14389333128929138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,float16,0,0.10904533664385478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,float16,0,0.14389866590499878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,64,128,1,fp8,fp8,0,0.10291199882825215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,fp8,0,0.14458666245142618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,float16,0,0.10937066872914632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,float16,0,0.14501866698265076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,fp8,0,0.11150933305422465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,fp8,0,0.0844640036424001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,64,128,1,fp8,fp8,0,0.1037493348121643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,fp8,0,0.14520000418027243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,64,0,1,fp8,fp8,0,0.13710932930310568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,float16,0,0.08488532900810242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,float16,0,0.10628799597422282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,64,128,1,fp8,fp8,0,0.08270399769147237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,fp8,0,0.10771200060844421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,64,128,1,fp8,fp8,0,0.08071466783682506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,64,0,1,fp8,fp8,0,0.10309867064158122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,64,0,1,fp8,fp8,0,0.10242666800816853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,float16,0,0.08477866649627686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,float16,0,0.10593600074450175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,fp8,0,0.08452266454696655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,fp8,0,0.10852799812952678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,float16,0,0.08493333061536153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,float16,0,0.10735467076301575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,fp8,0,0.08481066425641377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,64,128,1,fp8,fp8,0,0.08239466448624928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,fp8,0,0.10633599758148193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,64,128,1,fp8,fp8,0,0.08252800007661183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,64,0,1,fp8,fp8,0,0.10294399658838908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,float16,0,0.08475200335184734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,float16,0,0.10738133390744527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,fp8,0,0.08479467034339905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,fp8,0,0.10816533366839091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,64,0,1,fp8,fp8,0,0.10218666990598042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,float16,0,0.08653333783149719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,float16,0,0.10706133643786113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,fp8,0,0.08468266328175862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,64,128,1,fp8,fp8,0,0.08242133259773254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,fp8,0,0.107013334830602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,64,0,1,fp8,fp8,0,0.10319466392199199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,float16,0,3.063199996948242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,fp8,0,3.0566507975260415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,64,128,1,fp8,fp8,0,2.992080052693685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,float16,0,3.607856114705404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,float16,0,3.0781386693318686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,fp8,0,3.606186548868815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,64,0,1,fp8,fp8,0,3.512159983317057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,float16,0,3.6276801427205405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,fp8,0,3.0744107564290366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,64,128,1,fp8,fp8,0,3.104288101196289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,fp8,0,3.6197331746419272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,float16,0,3.0828320185343423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,64,0,1,fp8,fp8,0,3.641296068827311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,fp8,0,3.0684639612833657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,float16,0,3.621631940205892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,64,128,1,fp8,fp8,0,3.1180105209350586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,fp8,0,3.604858716328939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,64,0,1,fp8,fp8,0,3.656277338663737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,float16,0,3.1241652170817056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,fp8,0,3.0904105504353843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,64,128,1,fp8,fp8,0,3.132517178853353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,float16,0,3.680639902750651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,fp8,0,3.6878134409586587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,float16,0,1.6830453872680664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,64,0,1,fp8,fp8,0,3.665829340616862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,fp8,0,1.6583679517110188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,float16,0,1.9829012552897136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,64,128,1,fp8,fp8,0,1.7035733858744304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,fp8,0,1.9603947003682454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,64,0,1,fp8,fp8,0,1.9768586158752441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,float16,0,1.5427680015563965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,float16,0,1.8102025985717773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,fp8,0,1.538885275522868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,64,128,1,fp8,fp8,0,1.4834292729695637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,fp8,0,1.8129547437032063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,64,0,1,fp8,fp8,0,1.7582720120747883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,float16,0,1.54747740427653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,fp8,0,1.5476266543070476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,float16,0,1.81769593556722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,64,128,1,fp8,fp8,0,1.486453374226888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,fp8,0,1.8186720212300618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,64,0,1,fp8,fp8,0,1.7500319480895996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,float16,0,1.5515413284301758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,fp8,0,1.5450986226399739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,float16,0,1.8205386797587078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,64,128,1,fp8,fp8,0,1.514069398244222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,fp8,0,1.8177706400553386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,64,0,1,fp8,fp8,0,1.779743989308675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,float16,0,1.5516479810078938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,fp8,0,1.5502559343973796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,float16,0,1.8291680018107097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,float16,0,0.8447679678599039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,64,128,1,fp8,fp8,0,1.4891146024068196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,float16,0,0.996064027150472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,fp8,0,1.8247572580973308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,fp8,0,0.8292693297068278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,64,0,1,fp8,fp8,0,1.7575093905131023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,64,128,1,fp8,fp8,0,0.8395466804504395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,fp8,0,0.9792266686757406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,64,0,1,fp8,fp8,0,0.9784053166707357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,float16,0,0.7805120150248209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,float16,0,0.9167093435923258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,fp8,0,0.7788373629252116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,64,128,1,fp8,fp8,0,0.7311466534932455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,fp8,0,0.9189279874165853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,64,0,1,fp8,fp8,0,0.8684906959533691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,float16,0,0.7832640012105306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,float16,0,0.9191839694976807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,fp8,0,0.7819039821624756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,64,128,1,fp8,fp8,0,0.7447306315104166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,fp8,0,0.9204213619232178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,64,0,1,fp8,fp8,0,0.8818720181783041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,float16,0,0.7813119888305664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,float16,0,0.9198559919993082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,fp8,0,0.7812000115712484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,64,128,1,fp8,fp8,0,0.7456746896107992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,fp8,0,0.9192213217417399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,64,0,1,fp8,fp8,0,0.8804746468861898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,float16,0,0.7859466870625814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,float16,0,0.9286719957987467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,fp8,0,0.7847359975179037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,64,128,1,fp8,fp8,0,0.7489866415659586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,float16,0,0.4328213135401408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,fp8,0,0.9227999846140543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,float16,0,0.5093439817428589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,fp8,0,0.4254186550776164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,64,0,1,fp8,fp8,0,0.8843200206756592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,64,128,1,fp8,fp8,0,0.42851734161376953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,fp8,0,0.5004160006841024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,64,0,1,fp8,fp8,0,0.4972906510035197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,float16,0,0.3983893394470215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,fp8,0,0.39959998925526935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,float16,0,0.46933333079020184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,64,128,1,fp8,fp8,0,0.3763893445332845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,fp8,0,0.469376007715861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,64,0,1,fp8,fp8,0,0.4447840054829915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,float16,0,0.40009601910909015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,float16,0,0.47037335236867267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,fp8,0,0.39976000785827637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,64,128,1,fp8,fp8,0,0.38038400808970135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,fp8,0,0.4699573516845703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,float16,0,0.4722026586532593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,64,0,1,fp8,fp8,0,0.4509600003560384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,float16,0,0.39910932381947833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,fp8,0,0.39946667353312176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,64,128,1,fp8,fp8,0,0.3797173500061035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,fp8,0,0.4721813201904297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,64,0,1,fp8,fp8,0,0.4488106568654378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,float16,0,0.40185598532358807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,float16,0,0.4731146494547526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,fp8,0,0.4024266799290975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,64,128,1,fp8,fp8,0,0.38330666224161786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,fp8,0,0.4732266664505005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,float16,0,0.22481600443522134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,64,0,1,fp8,fp8,0,0.45366398493448895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,float16,0,0.2656960090001424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,float16,0,0.20672533909479776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,fp8,0,0.22155199448267618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,64,128,1,fp8,fp8,0,0.22479466597239176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,fp8,0,0.2616320053736369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,64,0,1,fp8,fp8,0,0.2603573401769002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,float16,0,0.24341332912445068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,fp8,0,0.20669333140055338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,64,128,1,fp8,fp8,0,0.1970026691754659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,fp8,0,0.24311999479929605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,64,128,1,fp8,fp8,0,0.1997119983037313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,64,0,1,fp8,fp8,0,0.23430933554967245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,float16,0,0.2082293430964152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,float16,0,0.24310932556788126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,fp8,0,0.20585066080093384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,fp8,0,0.24433066447575888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,64,0,1,fp8,fp8,0,0.2360639969507853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,float16,0,0.20600533485412598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,float16,0,0.24463999271392822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,fp8,0,0.20729066928227743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,64,128,1,fp8,fp8,0,0.1998293399810791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,fp8,0,0.2450773318608602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,64,0,1,fp8,fp8,0,0.23672000567118326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,float16,0,0.20770132541656494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,float16,0,0.24751466512680054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,fp8,0,0.20865599314371744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,fp8,0,0.12179733316103618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,64,128,1,fp8,fp8,0,0.2029119928677877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,fp8,0,0.2455199956893921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,64,0,1,fp8,fp8,0,0.2381760080655416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,float16,0,0.12341866890589397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,float16,0,0.1469439963499705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,fp8,0,0.11053333679835002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,64,128,1,fp8,fp8,0,0.12433600425720215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,fp8,0,0.1316266655921936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,fp8,0,0.1441386640071869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,64,0,1,fp8,fp8,0,0.1462506651878357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,float16,0,0.11137066284815471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,float16,0,0.13150399923324585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,64,128,1,fp8,fp8,0,0.1039573351542155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,64,0,1,fp8,fp8,0,0.12517333030700684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,float16,0,0.11032000184059143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,float16,0,0.13130133350690207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,float16,0,0.13173333803812662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,fp8,0,0.11105599999427795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,64,128,1,fp8,fp8,0,0.10634666681289673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,fp8,0,0.130730668703715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,64,0,1,fp8,fp8,0,0.12637333075205484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,float16,0,0.11077866951624553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,fp8,0,0.1107360025246938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,64,128,1,fp8,fp8,0,0.1076746682325999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,64,128,1,fp8,fp8,0,0.10852799812952678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,fp8,0,0.13199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,64,0,1,fp8,fp8,0,0.12566933035850525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,float16,0,0.11171733339627583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,float16,0,0.13150933384895325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,fp8,0,0.11149332920710246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,fp8,0,0.1325973371664683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,64,0,1,fp8,fp8,0,0.12993066509564719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,float16,0,0.06843199829260509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,64,0,1,fp8,fp8,0,0.08316799998283386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,float16,0,0.08096000055472057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,float16,0,0.07633066674073537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,fp8,0,0.06632000207901001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,64,128,1,fp8,fp8,0,0.07028799752394359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,fp8,0,0.07971199850241344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,float16,0,0.06392533580462138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,64,128,1,fp8,fp8,0,0.060175999999046326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,fp8,0,0.07691733539104462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,64,128,1,fp8,fp8,0,0.06198933223883311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,64,0,1,fp8,fp8,0,0.07229866584142049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,64,0,1,fp8,fp8,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,float16,0,0.06288533409436543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,float16,0,0.07638933261235555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,fp8,0,0.06436799963315327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,fp8,0,0.0765066643555959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,float16,0,0.06422399977842967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,float16,0,0.07654400169849396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,fp8,0,0.06333333253860474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,64,128,1,fp8,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,fp8,0,0.07694933315118153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,64,0,1,fp8,fp8,0,0.07366399963696797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,float16,0,0.0639466643333435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,float16,0,0.04133866727352142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,float16,0,0.07663466533025105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,fp8,0,0.06390400230884552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,64,128,1,fp8,fp8,0,0.06211733321348826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,fp8,0,0.050666665037473045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,fp8,0,0.07604800164699554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,64,0,1,fp8,fp8,0,0.07561600208282471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,float16,0,0.049733335773150124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,64,128,1,fp8,fp8,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,64,0,1,fp8,fp8,0,0.04978133241335551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,64,0,1,fp8,fp8,0,0.04752000172932943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,float16,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,float16,0,0.04961599906285604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,64,128,1,fp8,fp8,0,0.039034667114416756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,64,128,1,fp8,fp8,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,fp8,0,0.04845866560935974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,float16,0,0.039274667700131737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,float16,0,0.04864533245563507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,fp8,0,0.04980266590913137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,64,0,1,fp8,fp8,0,0.047872001926104225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,float16,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,float16,0,0.039274667700131737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,float16,0,0.048714667558670044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,fp8,0,0.039349332451820374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,64,128,1,fp8,fp8,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,fp8,0,0.04786666731039683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,64,0,1,fp8,fp8,0,0.04747200012207031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,float16,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,fp8,0,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,64,128,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,fp8,0,0.04966933528582255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,64,0,1,fp8,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,float16,0,2.963173230489095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,float16,0,2.9851147333780923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,fp8,0,2.959312121073405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,64,128,1,fp8,fp8,0,2.88752015431722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,fp8,0,2.9719839096069336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,64,0,1,fp8,fp8,0,2.938624064127604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,float16,0,2.9690612157185874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,float16,0,3.001903851826986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,fp8,0,2.969674746195475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,64,128,1,fp8,fp8,0,3.004432042439779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,fp8,0,2.9924853642781577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,64,0,1,fp8,fp8,0,3.046266555786133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,float16,0,2.9690879185994468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,float16,0,2.9869171778361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,fp8,0,2.959482510884603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,64,128,1,fp8,fp8,0,3.010416030883789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,fp8,0,2.986037254333496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,64,0,1,fp8,fp8,0,3.07150936126709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,float16,0,3.0059839884440103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,float16,0,3.0675414403279624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,fp8,0,2.9811840057373047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,64,128,1,fp8,fp8,0,3.038581212361654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,fp8,0,2.9985278447469077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,64,0,1,fp8,fp8,0,3.0654239654541016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,float16,0,1.6527946790059407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,float16,0,1.6657439867655437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,fp8,0,1.6130132675170898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,64,128,1,fp8,fp8,0,1.6370506286621094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,fp8,0,1.6595733960469563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,float16,0,1.49018128712972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,64,0,1,fp8,fp8,0,1.6682507197062175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,float16,0,1.4969493548075359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,fp8,0,1.4858187039693196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,64,128,1,fp8,fp8,0,1.4345280329386394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,fp8,0,1.49836270014445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,64,0,1,fp8,fp8,0,1.4423786799112956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,float16,0,1.4899039268493652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,float16,0,1.5025280316670735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,fp8,0,1.4905600547790527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,64,128,1,fp8,fp8,0,1.440949281056722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,fp8,0,1.5014346440633137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,64,0,1,fp8,fp8,0,1.4678773880004883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,float16,0,1.49401060740153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,float16,0,1.5013705889383953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,64,128,1,fp8,fp8,0,1.4730772972106934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,fp8,0,1.4893867174784343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,fp8,0,1.4990347226460774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,64,0,1,fp8,fp8,0,1.4782826105753581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,float16,0,1.4979573885599773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,float16,0,1.5120800336201985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,fp8,0,1.4943466186523438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,64,128,1,fp8,fp8,0,1.452720006306966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,float16,0,0.8198560078938802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,fp8,0,1.5080533027648926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,64,0,1,fp8,fp8,0,1.4600319862365723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,float16,0,0.8348053296407064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,fp8,0,0.8036533196767172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,fp8,0,0.8166666825612386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,64,128,1,fp8,fp8,0,0.8184853394826254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,float16,0,0.7526079813639323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,64,0,1,fp8,fp8,0,0.8269973595937093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,float16,0,0.7577333450317383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,fp8,0,0.7522133191426595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,64,128,1,fp8,fp8,0,0.7099626859029134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,fp8,0,0.7573653062184652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,64,0,1,fp8,fp8,0,0.7200906276702881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,float16,0,0.7545173168182373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,64,128,1,fp8,fp8,0,0.7213813463846842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,float16,0,0.759722630182902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,fp8,0,0.751909335454305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,fp8,0,0.7594453493754069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,64,0,1,fp8,fp8,0,0.7321973641713461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,float16,0,0.7529119650522867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,float16,0,0.7597280343373617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,fp8,0,0.7534560362497965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,64,128,1,fp8,fp8,0,0.7224960327148438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,fp8,0,0.7578506469726562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,64,0,1,fp8,fp8,0,0.7299839655558268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,float16,0,0.7570026715596517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,float16,0,0.7628160317738851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,fp8,0,0.7553706963857015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,64,128,1,fp8,fp8,0,0.7213919957478842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,fp8,0,0.7627200285593668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,float16,0,0.4182026783625285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,64,0,1,fp8,fp8,0,0.7338399887084961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,float16,0,0.42512532075246173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,float16,0,0.38464534282684326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,fp8,0,0.4102773269017537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,64,128,1,fp8,fp8,0,0.41652266184488934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,fp8,0,0.385045329729716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,64,128,1,fp8,fp8,0,0.3649226824442546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,fp8,0,0.4168533484141032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,64,0,1,fp8,fp8,0,0.42193599541982013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,float16,0,0.3880586624145508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,fp8,0,0.3892373243967692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,64,0,1,fp8,fp8,0,0.36956799030303955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,float16,0,0.3853439887364705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,float16,0,0.38840532302856445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,fp8,0,0.38580799102783203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,64,128,1,fp8,fp8,0,0.3681120077768962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,fp8,0,0.38736534118652344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,64,0,1,fp8,fp8,0,0.3741493225097656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,float16,0,0.3860960006713867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,float16,0,0.3879893223444621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,fp8,0,0.3861813147862752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,64,128,1,fp8,fp8,0,0.3680479923884074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,fp8,0,0.38812800248463947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,fp8,0,0.38922667503356934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,fp8,0,0.39160001277923584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,float16,0,0.2183306614557902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,64,0,1,fp8,fp8,0,0.37383464972178143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,float16,0,0.38684264818827313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,fp8,0,0.21512534221013388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,float16,0,0.39183998107910156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,64,128,1,fp8,fp8,0,0.36937065919240314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,64,0,1,fp8,fp8,0,0.375765323638916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,float16,0,0.22142932812372842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,64,128,1,fp8,fp8,0,0.21825599670410156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,fp8,0,0.21763734022776285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,64,0,1,fp8,fp8,0,0.22164799769719443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,float16,0,0.19960000117619833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,float16,0,0.20119466384251913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,fp8,0,0.2009119987487793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,fp8,0,0.19984533389409384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,64,128,1,fp8,fp8,0,0.1918613314628601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,fp8,0,0.20006400346755981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,64,0,1,fp8,fp8,0,0.19398399194081625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,float16,0,0.2004800041516622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,float16,0,0.20226667324701944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,64,128,1,fp8,fp8,0,0.19262399276097616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,fp8,0,0.2009226679801941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,fp8,0,0.20109866062800089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,fp8,0,0.2015733321507772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,64,0,1,fp8,fp8,0,0.19502399365107217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,64,0,1,fp8,fp8,0,0.19530133406321207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,float16,0,0.20222399632136026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,float16,0,0.20058133204778036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,float16,0,0.20154666900634766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,fp8,0,0.20292800664901733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,64,128,1,fp8,fp8,0,0.19334399700164795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,float16,0,0.20283732811609903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,fp8,0,0.20218666394551596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,64,128,1,fp8,fp8,0,0.194757342338562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,64,0,1,fp8,fp8,0,0.19757866859436035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,float16,0,0.12018666664759318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,float16,0,0.1228000024954478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,fp8,0,0.11749866604804993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,64,128,1,fp8,fp8,0,0.12147200107574463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,fp8,0,0.1199733316898346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,64,128,1,fp8,fp8,0,0.10116799672444661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,64,0,1,fp8,fp8,0,0.12462932864824931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,float16,0,0.10728533069292705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,float16,0,0.1093280017375946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,fp8,0,0.10730666915575664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,fp8,0,0.10709866881370544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,fp8,0,0.1090186635653178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,64,0,1,fp8,fp8,0,0.10314666231473286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,float16,0,0.10697066783905029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,float16,0,0.10970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,64,128,1,fp8,fp8,0,0.10302933057149251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,fp8,0,0.10938133796056111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,64,0,1,fp8,fp8,0,0.10521066188812256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,float16,0,0.10719466209411621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,float16,0,0.1092693308989207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,float16,0,0.10966400305430095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,fp8,0,0.10904000202814738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,64,128,1,fp8,fp8,0,0.10390399893124898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,fp8,0,0.10868266224861145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,64,0,1,fp8,fp8,0,0.10493866602579753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,float16,0,0.10922132929166158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,fp8,0,0.10874666770299275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,64,128,1,fp8,fp8,0,0.10622400045394897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,fp8,0,0.1093280017375946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,64,128,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,64,0,1,fp8,fp8,0,0.1076639990011851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,float16,0,0.06531199812889099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,float16,0,0.06671999891599019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,fp8,0,0.06523733337720235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,64,128,1,fp8,fp8,0,0.05850133299827576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,fp8,0,0.06577600042025249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,64,0,1,fp8,fp8,0,0.07018666466077168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,float16,0,0.06123200058937073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,float16,0,0.06211199859778086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,fp8,0,0.06188266475995382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,fp8,0,0.06299200157324474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,64,0,1,fp8,fp8,0,0.06018133461475372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,float16,0,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,float16,0,0.06228266656398773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,fp8,0,0.06162666777769724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,64,128,1,fp8,fp8,0,0.05975999931494395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,64,128,1,fp8,fp8,0,0.05977066854635874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,fp8,0,0.06294399996598561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,64,0,1,fp8,fp8,0,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,64,0,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,float16,0,0.06302933394908905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,float16,0,0.06234666705131531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,float16,0,0.06303466856479645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,fp8,0,0.062047998110453285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,float16,0,0.06223999957243601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,fp8,0,0.06260799864927928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,64,128,1,fp8,fp8,0,0.060175999999046326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,fp8,0,0.06354133288065593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,64,0,1,fp8,fp8,0,0.04234666625658671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,64,0,1,fp8,fp8,0,0.06164266665776571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,float16,0,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,float16,0,0.04152533411979675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,fp8,0,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,64,128,1,fp8,fp8,0,0.04109866668780645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,float16,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,float16,0,0.04075733323891958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,fp8,0,0.0393653338154157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,64,128,1,fp8,fp8,0,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,64,0,1,fp8,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,float16,0,0.0395413339138031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,float16,0,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,64,128,1,fp8,fp8,0,0.03885866701602936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,fp8,0,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,64,0,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,float16,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,float16,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,64,128,1,fp8,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,64,0,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,float16,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,float16,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,64,128,1,fp8,fp8,0,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,fp8,0,0.041093334555625916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,64,0,1,fp8,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,float16,0,0.02647999922434489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,float16,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,64,128,1,fp8,fp8,0,0.026149332523345947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,64,0,1,fp8,fp8,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,64,0,1,fp8,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,64,0,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,float16,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,64,128,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,64,0,1,fp8,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,float16,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,64,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,fp8,0,0.026170666019121807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,64,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,float16,0,1.3807679812113445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,float16,0,1.349605401357015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,fp8,0,1.3760746320088704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,64,128,1,fp8,fp8,0,1.3128320376078289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,fp8,0,1.3466347058614094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,64,0,1,fp8,fp8,0,1.2897013028462727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,float16,0,1.3754186630249023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,float16,0,1.3443039258321126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,fp8,0,1.3707946141560872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,64,128,1,fp8,fp8,0,1.342949390411377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,fp8,0,1.343616008758545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,64,0,1,fp8,fp8,0,1.3356693585713704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,float16,0,1.3790772755940754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,float16,0,1.3496534029642742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,fp8,0,1.3740533192952473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,64,128,1,fp8,fp8,0,1.3589332898457844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,fp8,0,1.3441920280456543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,64,0,1,fp8,fp8,0,1.332154671351115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,float16,0,1.3832160631815593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,float16,0,1.3549013137817383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,fp8,0,1.3791306813557942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,64,128,1,fp8,fp8,0,1.373626708984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,fp8,0,1.3493067423502605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,float16,0,0.7635893026987711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,float16,0,0.7498453458150228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,fp8,0,0.7451039950052897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,64,0,1,fp8,fp8,0,1.3500266075134277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,64,128,1,fp8,fp8,0,0.7641599973042806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,fp8,0,0.7326986789703369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,float16,0,0.6971200307210287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,64,0,1,fp8,fp8,0,0.7514879703521729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,float16,0,0.6810666720072428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,fp8,0,0.6957866350809733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,64,128,1,fp8,fp8,0,0.6533706585566202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,fp8,0,0.6814133326212565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,float16,0,0.6793013413747152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,64,0,1,fp8,fp8,0,0.6407039960225424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,float16,0,0.6953439712524414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,64,128,1,fp8,fp8,0,0.6653439998626709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,fp8,0,0.6929919719696045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,fp8,0,0.677786668141683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,64,0,1,fp8,fp8,0,0.651258667310079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,float16,0,0.696341355641683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,float16,0,0.6805333296457926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,fp8,0,0.6929120222727457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,64,128,1,fp8,fp8,0,0.6675573190053304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,fp8,0,0.6786399682362875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,64,0,1,fp8,fp8,0,0.6551786661148071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,float16,0,0.6974720160166422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,float16,0,0.6854613622029623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,fp8,0,0.6966133117675781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,64,128,1,fp8,fp8,0,0.6676639715830485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,fp8,0,0.6814560095469157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,64,128,1,fp8,fp8,0,0.38943998018900555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,float16,0,0.38705599308013916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,fp8,0,0.3739253282546997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,64,0,1,fp8,fp8,0,0.6564000050226847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,float16,0,0.38256533940633136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,fp8,0,0.3805600007375081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,64,0,1,fp8,fp8,0,0.3827679951985677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,float16,0,0.3545920054117839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,float16,0,0.3472906748453776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,fp8,0,0.35309334595998126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,float16,0,0.3535360097885132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,64,128,1,fp8,fp8,0,0.335807998975118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,fp8,0,0.3479679822921753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,64,0,1,fp8,fp8,0,0.3282666603724162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,float16,0,0.34700266520182294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,fp8,0,0.3529866536458333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,64,128,1,fp8,fp8,0,0.34041066964467365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,fp8,0,0.3442666530609131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,64,0,1,fp8,fp8,0,0.3325493335723877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,float16,0,0.35476799805959064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,float16,0,0.34725332260131836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,fp8,0,0.3532693386077881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,64,128,1,fp8,fp8,0,0.3407573302586873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,fp8,0,0.3466879924138387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,64,0,1,fp8,fp8,0,0.3330506682395935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,float16,0,0.3568586508433024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,float16,0,0.3497653404871623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,fp8,0,0.35599998633066815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,64,128,1,fp8,fp8,0,0.3404586712519328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,fp8,0,0.34878400961558026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,64,0,1,fp8,fp8,0,0.3357173204421997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,float16,0,0.20153599977493286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,float16,0,0.19801066319147745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,fp8,0,0.197818656762441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,64,128,1,fp8,fp8,0,0.20344533522923788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,fp8,0,0.19411200284957886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,64,0,1,fp8,fp8,0,0.20122667153676352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,float16,0,0.18315200010935465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,float16,0,0.1792853275934855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,fp8,0,0.18306666612625122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,64,128,1,fp8,fp8,0,0.17493865887324014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,fp8,0,0.17857599258422852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,64,0,1,fp8,fp8,0,0.17080533504486084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,float16,0,0.18268267313639322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,fp8,0,0.1829866568247477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,float16,0,0.17802667617797852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,64,128,1,fp8,fp8,0,0.17599999904632568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,fp8,0,0.1786293387413025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,64,0,1,fp8,fp8,0,0.17186667521794638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,float16,0,0.1834026575088501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,float16,0,0.17870400349299112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,float16,0,0.18317333857218424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,fp8,0,0.18106667200724283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,64,128,1,fp8,fp8,0,0.17735999822616577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,fp8,0,0.17880533138910928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,64,0,1,fp8,fp8,0,0.17494932810465494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,64,0,1,fp8,fp8,0,0.17275200287501016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,float16,0,0.17901867628097534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,fp8,0,0.18475733200709024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,64,128,1,fp8,fp8,0,0.17900800704956055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,fp8,0,0.1072746713956197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,fp8,0,0.1785973310470581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,float16,0,0.1102239986260732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,float16,0,0.10916800300280254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,fp8,0,0.10935999949773152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,64,128,1,fp8,fp8,0,0.09469866752624512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,64,128,1,fp8,fp8,0,0.11303466558456421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,64,0,1,fp8,fp8,0,0.11128000418345134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,float16,0,0.09742933511734009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,float16,0,0.09493866562843323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,fp8,0,0.09749333063761394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,fp8,0,0.09776533643404643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,fp8,0,0.09616532921791077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,64,0,1,fp8,fp8,0,0.09058133761088054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,float16,0,0.09713066617647807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,float16,0,0.09503466884295146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,64,128,1,fp8,fp8,0,0.09444800019264221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,fp8,0,0.0957493285338084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,fp8,0,0.09641066193580627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,64,0,1,fp8,fp8,0,0.09276800354321797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,float16,0,0.09915199875831604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,float16,0,0.09702400366465251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,float16,0,0.09657067060470581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,fp8,0,0.09798399607340495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,64,128,1,fp8,fp8,0,0.0946613351504008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,64,0,1,fp8,fp8,0,0.09233066439628601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,float16,0,0.09880000352859497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,fp8,0,0.0997759997844696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,64,128,1,fp8,fp8,0,0.0959999958674113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,fp8,0,0.09705066680908203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,64,0,1,fp8,fp8,0,0.09481066465377808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,float16,0,0.06078400214513143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,float16,0,0.0609386662642161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,fp8,0,0.059818665186564125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,fp8,0,0.05787733197212219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,64,128,1,fp8,fp8,0,0.06467733283837636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,fp8,0,0.059749335050582886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,64,0,1,fp8,fp8,0,0.06357866525650024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,float16,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,float16,0,0.05593066910902659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,64,128,1,fp8,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,fp8,0,0.056741332014401756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,64,0,1,fp8,fp8,0,0.05518400172392527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,float16,0,0.05705066521962484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,float16,0,0.05729066828886668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,fp8,0,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,64,128,1,fp8,fp8,0,0.055717334151268005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,fp8,0,0.056746666630109154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,64,0,1,fp8,fp8,0,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,float16,0,0.057850668827692665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,float16,0,0.05675200124581655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,fp8,0,0.057909334699312844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,64,128,1,fp8,fp8,0,0.05614933371543884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,fp8,0,0.056746666630109154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,64,0,1,fp8,fp8,0,0.05451733370621999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,64,128,1,fp8,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,fp8,0,0.05600533386071523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,64,0,1,fp8,fp8,0,0.054144000013669334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,float16,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,float16,0,0.05608533322811127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,float16,0,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,float16,0,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,fp8,0,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,64,128,1,fp8,fp8,0,0.03692800054947535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,fp8,0,0.036544000109036766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,64,0,1,fp8,fp8,0,0.03602666656176249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,float16,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,fp8,0,0.03538133452335993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,64,128,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,fp8,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,64,0,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,float16,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,float16,0,0.03460799902677536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,64,128,1,fp8,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,64,0,1,fp8,fp8,0,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,fp8,0,0.03488533447186152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,64,0,1,fp8,fp8,0,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,float16,0,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,float16,0,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,float16,0,0.035445332527160645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,64,128,1,fp8,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,64,0,1,fp8,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,float16,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,64,128,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,fp8,0,0.0348693331082662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,64,128,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,64,0,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,float16,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,64,128,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,64,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,float16,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,64,128,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,64,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,64,0,1,fp8,fp8,0,0.024671999116738636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,float16,0,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,float16,0,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,64,128,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,64,128,1,fp8,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,fp8,0,0.02073066681623459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,fp8,0,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,64,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,float16,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,float16,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,64,128,1,fp8,fp8,0,0.019978666057189304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,64,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,fp8,0,0.02042666698495547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,64,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,float16,0,0.01988799994190534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,64,128,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,64,128,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,float16,0,0.7351253032684326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,float16,0,0.7337546348571777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,fp8,0,0.7305386861165365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,64,128,1,fp8,fp8,0,0.7016106446584066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,fp8,0,0.730725367863973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,64,0,1,fp8,fp8,0,0.7026027043660482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,float16,0,0.7329546610514323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,float16,0,0.7324159940083822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,fp8,0,0.7319626808166504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,64,128,1,fp8,fp8,0,0.7143946488698324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,fp8,0,0.7302506764729818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,64,0,1,fp8,fp8,0,0.7126346429189047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,float16,0,0.736255963643392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,float16,0,0.7344533602396647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,fp8,0,0.7331519921620687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,64,128,1,fp8,fp8,0,0.715008020401001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,fp8,0,0.7315306663513184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,64,0,1,fp8,fp8,0,0.7152693271636963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,float16,0,0.739296038945516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,float16,0,0.7399466832478842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,fp8,0,0.7349119981129965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,64,128,1,fp8,fp8,0,0.7121866544087728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,float16,0,0.40804799397786456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,fp8,0,0.7382453282674154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,float16,0,0.4086986780166626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,64,0,1,fp8,fp8,0,0.7120373249053955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,fp8,0,0.40114132563273114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,64,128,1,fp8,fp8,0,0.4124639828999837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,fp8,0,0.4000213146209717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,64,0,1,fp8,fp8,0,0.4124533335367839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,fp8,0,0.3737706740697225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,float16,0,0.3742239872614543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,float16,0,0.3747413158416748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,64,128,1,fp8,fp8,0,0.35647467772165936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,fp8,0,0.37282665570576984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,64,0,1,fp8,fp8,0,0.35573331514994305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,float16,0,0.37174399693806964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,fp8,0,0.37145066261291504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,float16,0,0.3725279966990153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,fp8,0,0.3718453248341878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,64,128,1,fp8,fp8,0,0.3602293332417806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,64,0,1,fp8,fp8,0,0.3619413375854492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,float16,0,0.3738986651102702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,fp8,0,0.3731626669565837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,float16,0,0.37273065249125165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,fp8,0,0.37270931402842206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,64,128,1,fp8,fp8,0,0.36289600531260174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,64,0,1,fp8,fp8,0,0.36474132537841797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,float16,0,0.3767626682917277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,float16,0,0.3766453266143799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,fp8,0,0.37563733259836835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,64,128,1,fp8,fp8,0,0.3632320165634155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,fp8,0,0.375055988629659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,64,0,1,fp8,fp8,0,0.3638453483581543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,float16,0,0.21202667554219565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,64,0,1,fp8,fp8,0,0.21357333660125732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,float16,0,0.21184533834457397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,fp8,0,0.20758400360743204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,64,128,1,fp8,fp8,0,0.21397332350413004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,fp8,0,0.2076693375905355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,fp8,0,0.19215999046961466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,float16,0,0.193231999874115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,float16,0,0.193338672320048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,float16,0,0.19287467002868652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,fp8,0,0.19164800643920898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,fp8,0,0.19395732879638672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,64,128,1,fp8,fp8,0,0.18509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,64,0,1,fp8,fp8,0,0.1848106582959493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,float16,0,0.1928000052769979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,64,128,1,fp8,fp8,0,0.18689600626627603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,fp8,0,0.19178134202957153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,fp8,0,0.1933013399442037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,64,0,1,fp8,fp8,0,0.18580265839894614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,64,0,1,fp8,fp8,0,0.18920000394185385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,float16,0,0.19219734271367392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,float16,0,0.19386667013168335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,64,128,1,fp8,fp8,0,0.18850666284561157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,fp8,0,0.19318934281667074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,float16,0,0.1953279972076416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,float16,0,0.19411200284957886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,fp8,0,0.1933280030886332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,64,128,1,fp8,fp8,0,0.18916267156600952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,fp8,0,0.19379733006159464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,fp8,0,0.11311466495196025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,64,0,1,fp8,fp8,0,0.19025067488352457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,float16,0,0.11512533823649089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,float16,0,0.11550399661064148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,fp8,0,0.11390933394432068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,64,128,1,fp8,fp8,0,0.09784533580144246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,64,128,1,fp8,fp8,0,0.1172106663386027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,64,0,1,fp8,fp8,0,0.11780266960461934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,float16,0,0.10259733597437541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,float16,0,0.1030399998029073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,float16,0,0.10225066542625427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,fp8,0,0.10211199522018433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,fp8,0,0.10337066650390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,fp8,0,0.10222400228182475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,64,0,1,fp8,fp8,0,0.09890666604042053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,float16,0,0.10178666313489278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,fp8,0,0.10275733470916748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,64,128,1,fp8,fp8,0,0.09973866740862529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,64,0,1,fp8,fp8,0,0.09907199939092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,float16,0,0.10291199882825215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,float16,0,0.10406933228174846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,fp8,0,0.10286399722099304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,64,128,1,fp8,fp8,0,0.10040000081062317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,64,128,1,fp8,fp8,0,0.10165866216023763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,fp8,0,0.10258133212725322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,64,0,1,fp8,fp8,0,0.09943999846776326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,float16,0,0.10525866349538167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,float16,0,0.10496532917022705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,fp8,0,0.10319466392199199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,fp8,0,0.06187200049559275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,fp8,0,0.10353599985440572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,64,0,1,fp8,fp8,0,0.10078932841618855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,float16,0,0.06195199986298879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,float16,0,0.062037333846092224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,64,128,1,fp8,fp8,0,0.06552533308664958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,fp8,0,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,64,0,1,fp8,fp8,0,0.06499200065930684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,float16,0,0.058037335673967995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,64,0,1,fp8,fp8,0,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,float16,0,0.05781333148479462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,float16,0,0.05787200232346853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,64,128,1,fp8,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,float16,0,0.058543999989827476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,64,128,1,fp8,fp8,0,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,fp8,0,0.058880001306533813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,fp8,0,0.05754133562246958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,64,0,1,fp8,fp8,0,0.0572213331858317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,float16,0,0.0584746648867925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,float16,0,0.05781333148479462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,float16,0,0.0591786652803421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,float16,0,0.058506667613983154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,64,128,1,fp8,fp8,0,0.05683733522891998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,fp8,0,0.05791999896367391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,64,0,1,fp8,fp8,0,0.05741333464781443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,fp8,0,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,64,128,1,fp8,fp8,0,0.057674666245778404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,fp8,0,0.03857066730658213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,fp8,0,0.057855998476346336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,64,0,1,fp8,fp8,0,0.05677333474159241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,float16,0,0.038373333712418876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,float16,0,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,64,128,1,fp8,fp8,0,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,fp8,0,0.03817066550254822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,64,0,1,fp8,fp8,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,float16,0,0.036015999813874565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,64,128,1,fp8,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,64,128,1,fp8,fp8,0,0.03610666592915853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,64,0,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,float16,0,0.035887998839219414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,fp8,0,0.03771200031042099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,fp8,0,0.036373332142829895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,64,0,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,float16,0,0.037130666275819145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,float16,0,0.037418665985266365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,64,128,1,fp8,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,fp8,0,0.037061333656311035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,64,0,1,fp8,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,64,128,1,fp8,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,64,0,1,fp8,fp8,0,0.036346666514873505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,float16,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,float16,0,0.02553066611289978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,64,128,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,64,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,float16,0,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,64,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,float16,0,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,64,0,1,fp8,fp8,0,0.023717333873112995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,float16,0,0.026842666169007618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,float16,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,fp8,0,0.02425066630045573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,64,128,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,64,0,1,fp8,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,float16,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,float16,0,0.024341332415739696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,fp8,0,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,64,128,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,float16,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,64,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,64,128,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,float16,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,64,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,float16,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,fp8,0,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,64,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,float16,0,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,float16,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,64,128,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,float16,0,0.5306719938913981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,float16,0,0.5317493279774984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,fp8,0,0.5290826559066772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,64,128,1,fp8,fp8,0,0.49608532587687176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,fp8,0,0.5292426745096842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,64,0,1,fp8,fp8,0,0.4943733215332031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,float16,0,0.5285919904708862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,float16,0,0.5289119879404703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,fp8,0,0.529151995976766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,64,128,1,fp8,fp8,0,0.4982986847559611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,fp8,0,0.5280533234278361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,64,0,1,fp8,fp8,0,0.4997493426005046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,fp8,0,0.529093345006307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,64,128,1,fp8,fp8,0,0.4997546672821045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,float16,0,0.5295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,float16,0,0.529146671295166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,fp8,0,0.5298399925231934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,float16,0,0.5323733488718668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,64,0,1,fp8,fp8,0,0.5003840128580729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,float16,0,0.5324426492055258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,fp8,0,0.5311946471532186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,64,128,1,fp8,fp8,0,0.49928534030914307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,fp8,0,0.5291786591211954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,float16,0,0.2889546751976013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,64,0,1,fp8,fp8,0,0.5017813444137573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,float16,0,0.28755732377370197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,fp8,0,0.2857919931411743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,64,128,1,fp8,fp8,0,0.28408533334732056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,fp8,0,0.2861280043919881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,64,0,1,fp8,fp8,0,0.28365333875020343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,float16,0,0.2715839942296346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,float16,0,0.2711519996325175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,fp8,0,0.2701333363850911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,64,128,1,fp8,fp8,0,0.25411732991536456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,fp8,0,0.2712533275286357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,64,0,1,fp8,fp8,0,0.25562665859858197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,float16,0,0.2711306611696879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,float16,0,0.27109332879384357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,float16,0,0.27086400985717773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,fp8,0,0.271232008934021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,64,128,1,fp8,fp8,0,0.2561226685841878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,fp8,0,0.26979732513427734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,64,0,1,fp8,fp8,0,0.2544106642405192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,float16,0,0.2707200050354004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,fp8,0,0.27078932523727417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,64,128,1,fp8,fp8,0,0.25675199429194134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,fp8,0,0.2717546621958415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,64,0,1,fp8,fp8,0,0.2581706643104553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,float16,0,0.27261332670847577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,fp8,0,0.2709119915962219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,float16,0,0.27158933877944946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,fp8,0,0.2711306611696879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,64,128,1,fp8,fp8,0,0.26027733087539673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,64,0,1,fp8,fp8,0,0.2595360080401103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,float16,0,0.15373333295186362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,float16,0,0.153029332558314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,fp8,0,0.15051199992497763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,64,128,1,fp8,fp8,0,0.15217066804567972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,fp8,0,0.15176533659299216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,64,0,1,fp8,fp8,0,0.15261333187421164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,float16,0,0.14180266857147217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,float16,0,0.14351999759674072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,fp8,0,0.1434826652208964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,64,128,1,fp8,fp8,0,0.1340000033378601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,fp8,0,0.14258666833241782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,64,0,1,fp8,fp8,0,0.13357333342234293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,float16,0,0.14315733313560486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,64,0,1,fp8,fp8,0,0.13506666819254556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,float16,0,0.14289599657058716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,fp8,0,0.14214932918548584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,64,128,1,fp8,fp8,0,0.13478400309880575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,fp8,0,0.14243200421333313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,float16,0,0.14353066682815552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,float16,0,0.1430506706237793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,fp8,0,0.1422826647758484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,64,128,1,fp8,fp8,0,0.13563733299573263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,fp8,0,0.14205867052078247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,64,0,1,fp8,fp8,0,0.13596266508102417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,float16,0,0.1441439986228943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,float16,0,0.14412800470987955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,fp8,0,0.14411733547846475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,fp8,0,0.08180800080299377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,64,128,1,fp8,fp8,0,0.13593066732088724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,fp8,0,0.1442506710688273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,64,0,1,fp8,fp8,0,0.13636266191800436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,float16,0,0.0803466687599818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,float16,0,0.0822026679913203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,64,128,1,fp8,fp8,0,0.08240533371766408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,fp8,0,0.08039466540018718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,64,0,1,fp8,fp8,0,0.08339732885360718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,float16,0,0.07644799848397572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,float16,0,0.07657599945863088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,float16,0,0.07659199833869934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,float16,0,0.07687999804814656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,fp8,0,0.07824533184369405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,64,128,1,fp8,fp8,0,0.07281066477298737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,fp8,0,0.076773335536321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,64,0,1,fp8,fp8,0,0.07423466444015503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,fp8,0,0.07659733295440674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,fp8,0,0.07674666742483775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,64,128,1,fp8,fp8,0,0.07334933181603749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,fp8,0,0.07667199770609538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,64,0,1,fp8,fp8,0,0.07243200143178304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,float16,0,0.0782773345708847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,float16,0,0.0779306689898173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,64,128,1,fp8,fp8,0,0.0729973316192627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,fp8,0,0.07623999814192454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,fp8,0,0.07838400204976399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,64,0,1,fp8,fp8,0,0.07428800066312154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,64,0,1,fp8,fp8,0,0.07468800246715546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,float16,0,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,float16,0,0.07642666498819987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,float16,0,0.07736533383528392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,fp8,0,0.04577599962552389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,64,128,1,fp8,fp8,0,0.07383466760317485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,float16,0,0.04651199777921041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,fp8,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,64,128,1,fp8,fp8,0,0.04673600196838379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,64,0,1,fp8,fp8,0,0.04621866842110952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,float16,0,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,float16,0,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,fp8,0,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,64,128,1,fp8,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,64,0,1,fp8,fp8,0,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,float16,0,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,float16,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,fp8,0,0.04549333453178406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,64,128,1,fp8,fp8,0,0.044213334719340004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,fp8,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,fp8,0,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,64,0,1,fp8,fp8,0,0.04350399971008301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,float16,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,float16,0,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,fp8,0,0.04563199977080027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,64,128,1,fp8,fp8,0,0.04474133253097534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,64,128,1,fp8,fp8,0,0.0435146689414978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,64,0,1,fp8,fp8,0,0.04532266656557719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,float16,0,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,float16,0,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,fp8,0,0.04454400142033895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,fp8,0,0.04553066690762838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,64,0,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,float16,0,0.02976000060637792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,float16,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,fp8,0,0.029845332105954487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,64,128,1,fp8,fp8,0,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,64,0,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,float16,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,float16,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,64,128,1,fp8,fp8,0,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,fp8,0,0.030042665700117748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,64,0,1,fp8,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,64,128,1,fp8,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,64,0,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,float16,0,0.030192000170548756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,64,0,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,fp8,0,0.030640001098314922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,64,128,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,float16,0,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,float16,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,fp8,0,0.030458666384220123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,64,128,1,fp8,fp8,0,0.030053332448005676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,fp8,0,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,64,0,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,64,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,64,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,float16,0,0.020762667059898376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,64,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,64,128,1,fp8,fp8,0,0.020442667106787365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,64,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,float16,0,0.020319999506076176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,64,128,1,fp8,fp8,0,0.020293333878119785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,float16,0,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,64,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,float16,0,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,64,128,1,fp8,fp8,0,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,float16,0,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,fp8,0,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,64,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,64,0,1,fp8,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,float16,0,0.42869865894317627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,64,128,1,fp8,fp8,0,0.39507734775543213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,float16,0,0.42638933658599854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,fp8,0,0.42663999398549396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,fp8,0,0.42736534277598065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,64,0,1,fp8,fp8,0,0.3954506715138753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,float16,0,0.42709867159525555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,float16,0,0.42766932646433514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,fp8,0,0.42565866311391193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,fp8,0,0.42643733819325763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,64,128,1,fp8,fp8,0,0.39424534638722736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,float16,0,0.42711468537648517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,64,0,1,fp8,fp8,0,0.39528000354766846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,float16,0,0.4267359972000122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,fp8,0,0.4264640013376872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,64,128,1,fp8,fp8,0,0.396666685740153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,fp8,0,0.4259413480758667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,64,0,1,fp8,fp8,0,0.396997332572937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,float16,0,0.4291946490605672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,fp8,0,0.4279466470082601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,float16,0,0.4273600180943807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,64,128,1,fp8,fp8,0,0.39874664942423504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,fp8,0,0.4270026683807373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,float16,0,0.23009600241978964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,64,0,1,fp8,fp8,0,0.3981013298034668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,float16,0,0.23027199506759644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,fp8,0,0.22986133893330893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,64,128,1,fp8,fp8,0,0.22380799055099487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,fp8,0,0.2290560007095337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,64,128,1,fp8,fp8,0,0.20387732982635498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,64,0,1,fp8,fp8,0,0.22343466679255167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,float16,0,0.22196267048517862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,float16,0,0.22073066234588623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,fp8,0,0.22005333503087363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,fp8,0,0.22165334224700928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,64,0,1,fp8,fp8,0,0.20532800753911337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,float16,0,0.2216800053914388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,float16,0,0.22202134132385254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,float16,0,0.22208533684412637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,fp8,0,0.2205866575241089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,64,128,1,fp8,fp8,0,0.20567466815312704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,fp8,0,0.22170666853586832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,fp8,0,0.22100800275802612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,64,0,1,fp8,fp8,0,0.20417600870132446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,float16,0,0.2222986618677775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,64,128,1,fp8,fp8,0,0.20566932360331217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,fp8,0,0.22101867198944092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,64,0,1,fp8,fp8,0,0.2054133415222168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,float16,0,0.22367999951044717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,float16,0,0.22333866357803345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,fp8,0,0.22177066405614218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,64,128,1,fp8,fp8,0,0.20660267273585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,fp8,0,0.22127999862035116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,fp8,0,0.11948266625404358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,64,0,1,fp8,fp8,0,0.20701332887013754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,float16,0,0.11939199765523274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,float16,0,0.12056000034014384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,fp8,0,0.1206826666990916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,64,128,1,fp8,fp8,0,0.11870400110880534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,fp8,0,0.11593600114186604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,64,0,1,fp8,fp8,0,0.1176533301671346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,float16,0,0.11553600430488586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,float16,0,0.1172320048014323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,fp8,0,0.11557867129643758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,64,128,1,fp8,fp8,0,0.108815997838974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,64,0,1,fp8,fp8,0,0.10897066195805867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,float16,0,0.11655466755231221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,float16,0,0.11547199885050456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,fp8,0,0.11740799744923909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,64,128,1,fp8,fp8,0,0.10942400495211284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,fp8,0,0.11723732948303223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,64,0,1,fp8,fp8,0,0.10915199915568034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,64,0,1,fp8,fp8,0,0.10994133353233337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,float16,0,0.11597333351771037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,float16,0,0.11533332864443462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,fp8,0,0.11652266979217529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,fp8,0,0.11547199885050456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,64,128,1,fp8,fp8,0,0.10934399565060933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,fp8,0,0.11548800269762675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,float16,0,0.11762133240699768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,float16,0,0.11529599626859029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,64,128,1,fp8,fp8,0,0.11045866211255391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,fp8,0,0.11553066968917847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,64,0,1,fp8,fp8,0,0.10941867033640544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,float16,0,0.065610667069753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,float16,0,0.06625066697597504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,float16,0,0.06426133215427399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,fp8,0,0.06599466502666473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,64,128,1,fp8,fp8,0,0.06322133541107178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,fp8,0,0.06639466683069865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,64,0,1,fp8,fp8,0,0.06393600006898244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,float16,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,float16,0,0.06605866551399231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,float16,0,0.06571199993292491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,fp8,0,0.06572799881299336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,64,128,1,fp8,fp8,0,0.06202666461467743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,fp8,0,0.06424533327420552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,64,0,1,fp8,fp8,0,0.06192000210285187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,float16,0,0.06538666784763336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,64,128,1,fp8,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,fp8,0,0.06558933357397716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,fp8,0,0.0651146670182546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,64,0,1,fp8,fp8,0,0.06189866860707601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,float16,0,0.06436799963315327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,fp8,0,0.06450133522351582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,64,128,1,fp8,fp8,0,0.062208001812299095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,64,0,1,fp8,fp8,0,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,float16,0,0.06571199993292491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,64,0,1,fp8,fp8,0,0.062319998939832054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,float16,0,0.06473599870999654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,64,128,1,fp8,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,fp8,0,0.06400533517201741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,fp8,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,float16,0,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,float16,0,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,fp8,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,64,128,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,64,0,1,fp8,fp8,0,0.03908800085385641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,float16,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,float16,0,0.03908266623814901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,fp8,0,0.037392000357309975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,64,128,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,64,0,1,fp8,fp8,0,0.037392000357309975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,float16,0,0.03843733419974645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,float16,0,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,64,128,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,fp8,0,0.03844800094763438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,64,0,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,float16,0,0.03800000001986822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,fp8,0,0.03925866633653641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,64,128,1,fp8,fp8,0,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,64,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,float16,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,float16,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,fp8,0,0.038506666819254555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,64,128,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,fp8,0,0.03878399978081385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,64,0,1,fp8,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,float16,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,float16,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,64,128,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,64,0,1,fp8,fp8,0,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,64,128,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,64,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,fp8,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,64,128,1,fp8,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,64,0,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,fp8,0,0.026026666164398193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,64,128,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,64,0,1,fp8,fp8,0,0.02613866577545802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,float16,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,float16,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,64,128,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,float16,0,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,64,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,64,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,64,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,64,128,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,64,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,fp8,0,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,64,128,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,fp8,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,float16,0,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,float16,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,float16,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,64,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,64,128,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,fp8,0,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,64,128,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,fp8,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,fp8,0,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,float16,0,0.3761333227157593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,float16,0,0.37540801366170246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,fp8,0,0.37719468275705975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,64,128,1,fp8,fp8,0,0.34786665439605713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,float16,0,0.37540801366170246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,fp8,0,0.37442131837209064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,64,0,1,fp8,fp8,0,0.3474186658859253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,float16,0,0.3755679925282796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,fp8,0,0.3755893309911092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,float16,0,0.3755306800206502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,64,128,1,fp8,fp8,0,0.34681065877278644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,fp8,0,0.37728532155354816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,64,0,1,fp8,fp8,0,0.34836800893147785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,float16,0,0.3755199909210205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,64,0,1,fp8,fp8,0,0.34750934441884357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,fp8,0,0.37626131375630695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,64,128,1,fp8,fp8,0,0.3475253184636434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,fp8,0,0.37594131628672284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,float16,0,0.37642133235931396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,float16,0,0.37699198722839355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,fp8,0,0.37652266025543213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,64,128,1,fp8,fp8,0,0.34938132762908936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,fp8,0,0.37625598907470703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,64,0,1,fp8,fp8,0,0.3492746750513713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,float16,0,0.19706666469573975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,fp8,0,0.1969119906425476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,float16,0,0.19772799809773764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,fp8,0,0.19716266791025797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,64,128,1,fp8,fp8,0,0.1893493334452311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,64,0,1,fp8,fp8,0,0.19060266017913818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,float16,0,0.19308799505233765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,fp8,0,0.19308799505233765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,64,0,1,fp8,fp8,0,0.1814026633898417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,float16,0,0.19340266784032187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,fp8,0,0.1934880018234253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,64,128,1,fp8,fp8,0,0.18097599347432455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,float16,0,0.19322667519251505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,float16,0,0.19343467553456625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,fp8,0,0.19298666715621948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,64,128,1,fp8,fp8,0,0.18117332458496094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,fp8,0,0.19298134247461954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,64,0,1,fp8,fp8,0,0.18012267351150513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,float16,0,0.19310933351516724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,float16,0,0.19340799252192178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,float16,0,0.19300266106923422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,float16,0,0.19298666715621948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,fp8,0,0.1930560072263082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,64,128,1,fp8,fp8,0,0.18077333768208823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,fp8,0,0.19403199354807535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,float16,0,0.10356799761454265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,64,0,1,fp8,fp8,0,0.18039999405543009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,fp8,0,0.19299199183781943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,64,128,1,fp8,fp8,0,0.18080532550811768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,fp8,0,0.10381866494814555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,fp8,0,0.19347200791041055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,64,0,1,fp8,fp8,0,0.18039999405543009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,float16,0,0.1030506690343221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,float16,0,0.10505066315333049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,fp8,0,0.10507733623186748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,64,128,1,fp8,fp8,0,0.09898666540781657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,64,0,1,fp8,fp8,0,0.09885332981745402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,float16,0,0.10302933057149251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,fp8,0,0.10285333792368571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,64,128,1,fp8,fp8,0,0.09690666198730469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,fp8,0,0.10302933057149251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,64,0,1,fp8,fp8,0,0.09719467163085938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,float16,0,0.1032960017522176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,float16,0,0.10292266805966695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,float16,0,0.10291733344395955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,fp8,0,0.10302933057149251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,64,128,1,fp8,fp8,0,0.09675733248392741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,fp8,0,0.10309867064158122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,64,0,1,fp8,fp8,0,0.09796800216039021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,float16,0,0.10295466581980388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,float16,0,0.1032426655292511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,fp8,0,0.1029919981956482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,64,128,1,fp8,fp8,0,0.09714133540789287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,fp8,0,0.10287466645240784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,64,0,1,fp8,fp8,0,0.09883733590443929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,float16,0,0.10363733768463135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,fp8,0,0.10337066650390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,64,128,1,fp8,fp8,0,0.09853333234786987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,64,128,1,fp8,fp8,0,0.05635199944178263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,fp8,0,0.10354666908582051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,64,0,1,fp8,fp8,0,0.056789333621660866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,64,0,1,fp8,fp8,0,0.0971999963124593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,float16,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,float16,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,fp8,0,0.059232001503308616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,float16,0,0.05779733260472616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,float16,0,0.05818133552869161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,fp8,0,0.057850668827692665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,64,128,1,fp8,fp8,0,0.054858664671579994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,64,128,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,fp8,0,0.058287998040517174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,64,0,1,fp8,fp8,0,0.055914665261904396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,64,0,1,fp8,fp8,0,0.055919999877611794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,float16,0,0.0577706644932429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,float16,0,0.05826666454474131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,float16,0,0.057962665955225624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,fp8,0,0.05779199798901876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,fp8,0,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,float16,0,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,fp8,0,0.05825600028038025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,64,128,1,fp8,fp8,0,0.05587733288606008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,64,128,1,fp8,fp8,0,0.055674667159716286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,fp8,0,0.058143998185793556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,64,0,1,fp8,fp8,0,0.055786664287249245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,64,0,1,fp8,fp8,0,0.05614933371543884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,float16,0,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,float16,0,0.05778666834036509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,fp8,0,0.05850133299827576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,fp8,0,0.05825600028038025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,64,0,1,fp8,fp8,0,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,float16,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,64,128,1,fp8,fp8,0,0.03365333378314972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,64,128,1,fp8,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,float16,0,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,float16,0,0.03554133325815201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,float16,0,0.03506666670242945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,fp8,0,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,64,0,1,fp8,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,float16,0,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,64,128,1,fp8,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,fp8,0,0.03554133325815201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,64,0,1,fp8,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,float16,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,float16,0,0.03494933247566223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,float16,0,0.03488533447186152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,64,128,1,fp8,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,fp8,0,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,64,0,1,fp8,fp8,0,0.03448000053564707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,float16,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,float16,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,fp8,0,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,64,128,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,fp8,0,0.03505599995454153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,64,0,1,fp8,fp8,0,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,64,128,1,fp8,fp8,0,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,64,0,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,float16,0,0.024149333437283833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,64,128,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,float16,0,0.02465066562096278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,64,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,64,128,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,float16,0,0.024010665714740753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,64,128,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,64,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,64,128,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,64,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,float16,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,64,128,1,fp8,fp8,0,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,float16,0,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,float16,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,64,128,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,64,128,1,fp8,fp8,0,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,64,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,float16,0,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,fp8,0,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,64,128,1,fp8,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,64,0,1,fp8,fp8,0,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,64,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,64,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,float16,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,float16,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,64,0,1,fp8,fp8,0,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,fp8,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,float16,0,2.872842788696289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,64,128,1,fp8,fp8,0,2.6133813858032227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,fp8,0,2.8890771865844727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,float16,0,2.8906561533610025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,fp8,0,2.913397471110026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,64,128,1,fp8,fp8,0,2.6362293561299643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,float16,0,2.9289493560791016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,fp8,0,2.951807975769043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,64,0,1,fp8,fp8,0,15.816805521647135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,64,128,1,fp8,fp8,0,2.681269327799479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,float16,0,17.179898579915363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,fp8,0,17.178442637125652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,float16,0,17.21338144938151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,float16,0,1.6665387153625488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,64,0,1,fp8,fp8,0,15.852357228597006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,fp8,0,1.7025814056396484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,fp8,0,17.269034067789715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,64,128,1,fp8,fp8,0,1.5762720108032227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,float16,0,1.484341303507487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,float16,0,17.27847417195638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,fp8,0,1.4982293446858723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,64,128,1,fp8,fp8,0,1.3552427291870117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,float16,0,8.936330795288086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,float16,0,1.4931200345357258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,64,0,1,fp8,fp8,0,8.25992520650228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,fp8,0,8.976922353108725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,float16,0,8.698853174845377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,fp8,0,1.5081493059794109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,64,128,1,fp8,fp8,0,1.3629706700642903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,64,0,1,fp8,fp8,0,15.912410736083984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,float16,0,1.508618672688802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,fp8,0,17.26676305135091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,fp8,0,8.699973424275717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,64,0,1,fp8,fp8,0,8.022709528605143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,fp8,0,1.524661382039388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,64,128,1,fp8,fp8,0,1.3836266199747722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,float16,0,0.909829298655192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,float16,0,8.695669174194336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,fp8,0,0.9319360256195068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,fp8,0,8.725189208984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,64,128,1,fp8,fp8,0,0.8703733285268148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,64,0,1,fp8,fp8,0,8.040543874104818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,float16,0,0.8255893389383951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,float16,0,4.600319862365723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,float16,0,8.737781524658203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,fp8,0,0.8324320316314697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,64,128,1,fp8,fp8,0,0.7602826754252116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,64,0,1,fp8,fp8,0,8.053770701090494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,fp8,0,8.74614397684733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,fp8,0,4.626181284586589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,64,0,1,fp8,fp8,0,4.263146718343099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,float16,0,0.8297333717346191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,float16,0,4.493749300638835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,fp8,0,0.8359893163045248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,64,128,1,fp8,fp8,0,0.7650400002797445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,float16,0,0.835962692896525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,fp8,0,4.492282549540202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,64,0,1,fp8,fp8,0,4.145338694254558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,fp8,0,0.8443520069122314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,float16,0,4.497008005777995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,64,128,1,fp8,fp8,0,0.7756693363189697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,fp8,0,4.50549856821696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,float16,0,0.5963466564814249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,64,0,1,fp8,fp8,0,4.157909393310547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,fp8,0,0.5946986675262451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,float16,0,4.506143887837728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,64,128,1,fp8,fp8,0,0.5555093288421631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,float16,0,2.5076640446980796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,float16,0,0.5963253180185953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,fp8,0,4.511167844136556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,fp8,0,0.5968426863352457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,64,128,1,fp8,fp8,0,0.5543306668599447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,64,0,1,fp8,fp8,0,4.157941182454427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,fp8,0,2.511103947957357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,64,0,1,fp8,fp8,0,2.3088372548421225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,float16,0,0.5967359940210978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,fp8,0,0.5970613161722819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,64,128,1,fp8,fp8,0,0.5565653244654337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,float16,0,2.4900639851888022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,fp8,0,2.493845303853353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,float16,0,0.5970026652018229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,64,0,1,fp8,fp8,0,2.3060107231140137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,fp8,0,0.5975306828816732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,float16,0,2.4918346405029297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,64,128,1,fp8,fp8,0,0.5532426834106445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,fp8,0,2.493834654490153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,64,0,1,fp8,fp8,0,2.302074591318766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,float16,0,2.5008959770202637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,64,0,1,fp8,fp8,0,2.304288069407145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,fp8,0,2.4911306699117026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,float16,0,2.1257972717285156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,fp8,0,2.1428960164388022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,64,128,1,fp8,fp8,0,1.93505064646403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,float16,0,2.140175978342692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,fp8,0,2.1582132975260415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,64,128,1,fp8,fp8,0,1.9491839408874512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,float16,0,2.164074738820394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,float16,0,10.112293243408203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,fp8,0,2.1841440200805664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,64,0,1,fp8,fp8,0,9.341093063354492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,fp8,0,10.1134401957194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,64,128,1,fp8,fp8,0,1.9822187423706055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,float16,0,10.126245498657227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,float16,0,1.2490399678548176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,fp8,0,10.158613204956055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,64,0,1,fp8,fp8,0,9.341594696044922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,fp8,0,1.2760106722513835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,64,128,1,fp8,fp8,0,1.1804426511128743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,float16,0,10.173306783040365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,float16,0,1.1163893540700276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,fp8,0,1.1264533201853435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,64,128,1,fp8,fp8,0,1.0201173623402913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,float16,0,5.332810719807942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,fp8,0,5.362447738647461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,64,0,1,fp8,fp8,0,4.9205013910929365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,float16,0,1.1239253679911296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,64,0,1,fp8,fp8,0,9.39180819193522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,float16,0,5.143194516499837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,fp8,0,10.201520284016928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,fp8,0,1.1324533621470134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,64,128,1,fp8,fp8,0,1.0275946458180745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,float16,0,1.135434627532959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,64,0,1,fp8,fp8,0,4.752810796101888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,fp8,0,5.162783940633138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,fp8,0,1.147338628768921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,64,128,1,fp8,fp8,0,1.0419306755065918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,float16,0,0.686848004659017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,float16,0,5.167632102966309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,fp8,0,0.7027786572774252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,64,0,1,fp8,fp8,0,4.758384068806966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,64,128,1,fp8,fp8,0,0.6582773526509603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,fp8,0,5.179007848103841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,float16,0,5.17739741007487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,float16,0,0.6254133383433024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,float16,0,2.7730080286661782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,fp8,0,0.6311573187510172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,64,0,1,fp8,fp8,0,4.776991844177246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,64,128,1,fp8,fp8,0,0.5775733391443888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,fp8,0,5.191887855529785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,64,0,1,fp8,fp8,0,2.564858595530192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,fp8,0,2.78763739267985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,float16,0,2.688229242960612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,float16,0,0.6277493238449097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,fp8,0,0.6315306822458903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,64,128,1,fp8,fp8,0,0.5802239974339803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,fp8,0,2.6897811889648438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,float16,0,0.6332266728083292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,64,0,1,fp8,fp8,0,2.4809120496114097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,fp8,0,0.6396640141805013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,64,128,1,fp8,fp8,0,0.5866880019505819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,float16,0,2.694202740987142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,fp8,0,2.6945279439290366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,float16,0,0.45132267475128174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,64,0,1,fp8,fp8,0,2.492389361063639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,fp8,0,0.45368532339731854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,float16,0,2.6963841120402017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,64,128,1,fp8,fp8,0,0.4206826686859131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,float16,0,1.5406667391459148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,float16,0,0.4523306687672933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,fp8,0,2.7049547831217446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,64,0,1,fp8,fp8,0,2.492581367492676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,64,0,1,fp8,fp8,0,1.4194293022155762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,fp8,0,1.5439626375834148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,fp8,0,0.4532426595687866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,64,128,1,fp8,fp8,0,0.4196693499883016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,float16,0,1.5263840357462566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,float16,0,0.4533546765645345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,fp8,0,0.45258132616678876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,64,128,1,fp8,fp8,0,0.4225120147069295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,fp8,0,1.5335520108540852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,64,0,1,fp8,fp8,0,1.416975975036621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,float16,0,1.5351786613464355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,float16,0,0.4519519805908203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,fp8,0,0.45290664831797284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,fp8,0,1.529391924540202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,64,0,1,fp8,fp8,0,1.4204853375752766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,64,128,1,fp8,fp8,0,0.4223946730295817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,float16,0,1.5324427286783855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,fp8,0,1.538863976796468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,64,0,1,fp8,fp8,0,1.4230079650878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,float16,0,1.7668107350667317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,64,128,1,fp8,fp8,0,1.6039573351542156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,fp8,0,1.782101313273112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,float16,0,1.7773760159810383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,fp8,0,1.7910186449686687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,64,128,1,fp8,fp8,0,1.6181707382202148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,float16,0,1.795786698659261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,float16,0,7.275472005208333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,fp8,0,7.2901865641276045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,64,0,1,fp8,fp8,0,6.716458638509114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,fp8,0,1.814234733581543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,float16,0,7.299322764078776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,64,128,1,fp8,fp8,0,1.6415626207987468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,fp8,0,7.319269180297852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,float16,0,1.0409706433614094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,64,0,1,fp8,fp8,0,6.728037516276042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,fp8,0,1.0646080176035564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,64,128,1,fp8,fp8,0,0.9867093563079834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,float16,0,7.325765609741211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,float16,0,0.9320533275604248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,fp8,0,0.9419306914011637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,float16,0,3.872042655944824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,64,128,1,fp8,fp8,0,0.8525813420613607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,fp8,0,3.891615867614746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,64,0,1,fp8,fp8,0,3.5791571935017905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,float16,0,0.938202699025472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,64,0,1,fp8,fp8,0,6.75660769144694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,float16,0,3.7290932337443032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,fp8,0,7.343685150146484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,fp8,0,0.9464800357818604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,64,128,1,fp8,fp8,0,0.8583733240763346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,float16,0,0.9461653232574463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,64,0,1,fp8,fp8,0,3.4409866333007812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,fp8,0,3.730229377746582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,fp8,0,0.9576373100280762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,float16,0,3.732096036275228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,64,128,1,fp8,fp8,0,0.8708000183105469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,float16,0,0.5719199975331625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,64,0,1,fp8,fp8,0,3.4487040837605796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,fp8,0,3.749626795450846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,fp8,0,0.5868800083796183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,64,128,1,fp8,fp8,0,0.5496639808019003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,float16,0,3.7549333572387695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,float16,0,0.5206613143285116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,float16,0,2.0214667320251465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,fp8,0,0.5243573188781738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,64,0,1,fp8,fp8,0,3.455135981241862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,fp8,0,3.763429323832194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,64,128,1,fp8,fp8,0,0.4803200165430705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,fp8,0,2.0396745999654136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,64,0,1,fp8,fp8,0,1.8811732927958171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,float16,0,1.951130708058675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,float16,0,0.5226879914601644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,fp8,0,0.5267359813054403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,64,128,1,fp8,fp8,0,0.48622934023539227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,fp8,0,1.9566027323404949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,64,0,1,fp8,fp8,0,1.809823989868164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,float16,0,0.5270933310190836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,fp8,0,0.533898671468099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,float16,0,1.9603412946065266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,64,128,1,fp8,fp8,0,0.4901493390401204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,64,0,1,fp8,fp8,0,1.8100533485412598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,fp8,0,1.9678880373636882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,float16,0,0.37937064965566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,fp8,0,0.3773546616236369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,float16,0,1.9637974103291829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,64,128,1,fp8,fp8,0,0.3529226779937744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,float16,0,1.1432426770528157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,float16,0,0.3768320083618164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,fp8,0,1.9736746152242024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,64,0,1,fp8,fp8,0,1.817797342936198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,fp8,0,1.1452639897664387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,fp8,0,0.37718399365743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,64,0,1,fp8,fp8,0,1.0541813373565674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,64,128,1,fp8,fp8,0,0.3508373498916626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,float16,0,1.13156263033549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,float16,0,0.3782293399175008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,fp8,0,1.1340479850769043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,64,0,1,fp8,fp8,0,1.051632006963094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,fp8,0,0.3771200180053711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,64,128,1,fp8,fp8,0,0.3525386651357015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,float16,0,1.1325493653615315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,float16,0,0.3778719902038574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,fp8,0,0.3779573440551758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,fp8,0,1.1337066491444905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,64,0,1,fp8,fp8,0,1.0511093139648438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,float16,0,1.1383306980133057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,64,128,1,fp8,fp8,0,0.3522453308105469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,fp8,0,1.134714682896932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,64,0,1,fp8,fp8,0,1.057861328125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,float16,0,2.7949867248535156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,64,128,1,fp8,fp8,0,2.536634604136149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,fp8,0,2.812570571899414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,float16,0,2.8118985493977866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,64,128,1,fp8,fp8,0,2.5582613945007324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,fp8,0,2.833967844645182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,64,0,1,fp8,fp8,0,8.940991719563803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,float16,0,2.8501065572102866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,float16,0,9.729520161946615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,fp8,0,9.74953587849935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,float16,0,9.768117268880209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,fp8,0,2.8696158727010093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,64,128,1,fp8,fp8,0,2.604858716328939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,64,0,1,fp8,fp8,0,8.971413294474283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,fp8,0,9.791162490844727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,float16,0,1.5956692695617676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,fp8,0,1.6277707417805989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,64,128,1,fp8,fp8,0,1.5021440188090007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,float16,0,9.82358423868815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,float16,0,1.4113759994506836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,float16,0,5.134592056274414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,fp8,0,1.4255040486653645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,64,128,1,fp8,fp8,0,1.2812639872233074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,fp8,0,5.179845492045085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,64,0,1,fp8,fp8,0,9.025525410970053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,64,0,1,fp8,fp8,0,4.750560124715169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,fp8,0,9.830528259277344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,float16,0,4.9063520431518555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,float16,0,1.4202666282653809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,fp8,0,1.434165318806966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,64,128,1,fp8,fp8,0,1.2934613227844238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,float16,0,1.435647964477539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,64,0,1,fp8,fp8,0,4.520895957946777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,fp8,0,1.4523946444193523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,fp8,0,4.912698745727539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,64,128,1,fp8,fp8,0,1.31222399075826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,float16,0,4.930586814880371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,float16,0,0.8346347014109293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,64,0,1,fp8,fp8,0,4.525173187255859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,fp8,0,4.931568145751953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,fp8,0,0.8547093073527018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,64,128,1,fp8,fp8,0,0.7927520275115967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,float16,0,4.951615969340007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,float16,0,0.7501599788665771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,float16,0,2.633882681528727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,fp8,0,0.7557706832885742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,64,128,1,fp8,fp8,0,0.686896006266276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,64,0,1,fp8,fp8,0,4.5524905522664385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,fp8,0,2.6539200146993003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,64,0,1,fp8,fp8,0,2.439098676045736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,fp8,0,4.967002550760905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,float16,0,2.5217812856038413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,float16,0,0.7539573510487875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,fp8,0,0.7601493199666342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,64,128,1,fp8,fp8,0,0.6902986367543539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,fp8,0,2.5308693250020347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,float16,0,0.7604266802469889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,64,0,1,fp8,fp8,0,2.33188803990682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,fp8,0,0.768613338470459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,float16,0,2.5354933738708496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,64,128,1,fp8,fp8,0,0.7001760005950928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,64,0,1,fp8,fp8,0,2.3326826095581055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,fp8,0,2.5434187253316245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,float16,0,0.46161067485809326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,fp8,0,0.47355735301971436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,64,128,1,fp8,fp8,0,0.44391465187072754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,float16,0,2.5443785985310874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,float16,0,1.3944427172342937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,float16,0,0.4185546636581421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,64,0,1,fp8,fp8,0,2.3446666399637857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,fp8,0,2.5574026107788086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,fp8,0,0.4230453173319499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,64,0,1,fp8,fp8,0,1.2932693163553874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,64,128,1,fp8,fp8,0,0.38953065872192383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,fp8,0,1.4036213556925456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,float16,0,1.3359519640604656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,float16,0,0.42227200667063397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,fp8,0,0.42496001720428467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,64,128,1,fp8,fp8,0,0.3916586637496948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,fp8,0,1.3399413426717122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,64,0,1,fp8,fp8,0,1.2394133408864338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,float16,0,0.4264693260192871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,float16,0,1.3433173497517903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,fp8,0,0.4302346706390381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,fp8,0,1.3465065956115723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,64,128,1,fp8,fp8,0,0.39637335141499835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,64,0,1,fp8,fp8,0,1.2412160237630208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,float16,0,1.3469386100769043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,float16,0,0.30871466795603436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,fp8,0,0.3098026712735494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,64,128,1,fp8,fp8,0,0.28947200377782184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,fp8,0,1.3544960021972656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,float16,0,0.8012959957122803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,64,0,1,fp8,fp8,0,1.2463626861572266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,float16,0,0.307861328125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,fp8,0,0.801088015238444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,64,0,1,fp8,fp8,0,0.7419520219167074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,fp8,0,0.30829334259033203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,64,128,1,fp8,fp8,0,0.2861493428548177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,float16,0,0.7944800059000651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,float16,0,0.30802132685979206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,fp8,0,0.7997120221455892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,64,0,1,fp8,fp8,0,0.7375786304473877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,fp8,0,0.30824534098307294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,64,128,1,fp8,fp8,0,0.2874133388201396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,float16,0,0.7987573146820068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,float16,0,0.31189332405726117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,fp8,0,0.795034646987915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,64,0,1,fp8,fp8,0,0.7391839822133383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,fp8,0,0.3118026653925578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,64,128,1,fp8,fp8,0,0.2873973250389099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,float16,0,0.7975786526997884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,fp8,0,0.7997706731160482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,64,0,1,fp8,fp8,0,0.7417066891988119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,float16,0,2.0668320655822754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,64,128,1,fp8,fp8,0,1.871616045633952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,fp8,0,2.081557273864746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,float16,0,2.0825440088907876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,fp8,0,2.0989813804626465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,64,128,1,fp8,fp8,0,1.891738732655843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,float16,0,5.877941131591797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,64,0,1,fp8,fp8,0,5.4047896067301435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,fp8,0,5.903045018513997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,float16,0,2.104207992553711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,float16,0,5.9133758544921875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,fp8,0,2.123055934906006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,64,128,1,fp8,fp8,0,1.9228533109029133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,fp8,0,5.918069203694661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,float16,0,1.197493314743042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,64,0,1,fp8,fp8,0,5.433626810709636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,fp8,0,1.2211039861043294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,64,128,1,fp8,fp8,0,1.1261280377705891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,float16,0,5.946741104125977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,float16,0,1.060149351755778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,float16,0,3.1513439814249673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,64,0,1,fp8,fp8,0,2.9212799072265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,fp8,0,1.0710986455281575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,fp8,0,3.184885342915853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,fp8,0,5.966325124104817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,64,0,1,fp8,fp8,0,5.455365498860677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,64,128,1,fp8,fp8,0,0.9645600318908691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,float16,0,2.9883572260538735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,float16,0,1.068010648091634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,fp8,0,1.0777066548665364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,64,128,1,fp8,fp8,0,0.9717013041178385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,fp8,0,2.9962132771809897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,64,0,1,fp8,fp8,0,2.7545973459879556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,float16,0,1.0796639919281006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,fp8,0,1.092128038406372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,float16,0,3.0062081019083657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,64,128,1,fp8,fp8,0,0.9867520332336426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,fp8,0,3.011002540588379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,64,0,1,fp8,fp8,0,2.7569332122802734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,float16,0,0.6309866507848104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,float16,0,3.0216054916381836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,fp8,0,0.6447306474049886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,64,128,1,fp8,fp8,0,0.5988373359044393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,float16,0,1.6331946055094402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,float16,0,0.5670719941457113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,64,0,1,fp8,fp8,0,2.775338808695475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,fp8,0,3.035615921020508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,64,0,1,fp8,fp8,0,1.5127040545145671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,fp8,0,0.5714346567789713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,fp8,0,1.6436907450358074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,64,128,1,fp8,fp8,0,0.5196746587753296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,float16,0,1.550330638885498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,float16,0,0.5687520106633505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,fp8,0,0.5738453467686971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,64,128,1,fp8,fp8,0,0.5226026773452759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,fp8,0,1.5577653249104817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,64,0,1,fp8,fp8,0,1.431861400604248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,float16,0,0.5755999883015951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,float16,0,1.5574933687845867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,fp8,0,0.5803946654001871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,64,128,1,fp8,fp8,0,0.5290720065434774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,fp8,0,1.5632586479187012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,64,0,1,fp8,fp8,0,1.4348692893981934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,float16,0,0.3511360088984172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,float16,0,1.5643893877665203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,fp8,0,0.3598666588465373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,64,128,1,fp8,fp8,0,0.33719468116760254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,fp8,0,1.5740853945414226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,float16,0,0.8724160194396973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,64,0,1,fp8,fp8,0,1.446181297302246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,float16,0,0.3168213367462158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,fp8,0,0.8805546760559082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,64,0,1,fp8,fp8,0,0.8146666685740153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,fp8,0,0.31973334153493244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,float16,0,0.8313173453013102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,64,128,1,fp8,fp8,0,0.29604266087214154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,float16,0,0.3189599911371867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,fp8,0,0.8348320325215658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,64,0,1,fp8,fp8,0,0.771941343943278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,fp8,0,0.32262933254241943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,64,128,1,fp8,fp8,0,0.2977813283602397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,float16,0,0.8350826899210612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,float16,0,0.32412266731262207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,fp8,0,0.32656532526016235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,fp8,0,0.8358506361643473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,64,0,1,fp8,fp8,0,0.7748160362243652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,64,128,1,fp8,fp8,0,0.3036373257637024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,float16,0,0.8401119709014893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,float16,0,0.2339573303858439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,fp8,0,0.8456213474273682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,64,0,1,fp8,fp8,0,0.7799093723297119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,fp8,0,0.2343519926071167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,float16,0,0.5166186491648356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,64,128,1,fp8,fp8,0,0.21943465868631998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,fp8,0,0.5172746578852335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,fp8,0,0.2313493291536967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,float16,0,0.2341973384221395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,64,0,1,fp8,fp8,0,0.4790240128835042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,float16,0,0.5123786528905233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,64,128,1,fp8,fp8,0,0.21609600385030112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,fp8,0,0.5148373444875082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,float16,0,0.23183466990788779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,64,0,1,fp8,fp8,0,0.47546664873758954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,fp8,0,0.23147734006245932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,float16,0,0.23417067527770996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,64,128,1,fp8,fp8,0,0.2195146679878235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,fp8,0,0.23226133982340494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,float16,0,0.517136017481486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,fp8,0,0.513429323832194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,64,0,1,fp8,fp8,0,0.4762186606725057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,float16,0,0.5141546726226807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,64,128,1,fp8,fp8,0,0.21769599119822183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,fp8,0,0.5181653499603271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,64,0,1,fp8,fp8,0,0.4805706739425659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,float16,0,2.7503093083699546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,64,128,1,fp8,fp8,0,2.4928266207377114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,fp8,0,2.7705065409342446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,float16,0,2.770384152730306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,fp8,0,2.789056142171224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,float16,0,5.950938542683919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,64,128,1,fp8,fp8,0,2.5149973233540854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,64,0,1,fp8,fp8,0,5.472826639811198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,fp8,0,5.965514500935872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,float16,0,5.98310915629069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,float16,0,2.809189478556315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,64,128,1,fp8,fp8,0,2.564261277516683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,fp8,0,6.003546396891276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,fp8,0,2.8269920349121094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,64,0,1,fp8,fp8,0,5.492842356363933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,float16,0,1.55514129002889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,fp8,0,1.585055987040202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,64,128,1,fp8,fp8,0,1.4622400601704915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,float16,0,6.030117034912109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,fp8,0,6.043738683064778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,float16,0,3.2021493911743164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,64,0,1,fp8,fp8,0,5.5364532470703125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,float16,0,1.3733493487040203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,fp8,0,3.228762626647949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,fp8,0,1.3828214009602864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,64,0,1,fp8,fp8,0,2.9634345372517905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,64,128,1,fp8,fp8,0,1.242517312367757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,float16,0,2.985658645629883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,float16,0,1.381440003712972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,fp8,0,1.3920532862345378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,64,128,1,fp8,fp8,0,1.2524320284525554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,64,0,1,fp8,fp8,0,2.7423839569091797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,fp8,0,3.00106143951416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,float16,0,1.398373285929362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,float16,0,3.0000480016072593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,fp8,0,3.007882754007975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,fp8,0,1.4100213050842285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,64,128,1,fp8,fp8,0,1.2724746863047283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,64,0,1,fp8,fp8,0,2.7488266626993814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,float16,0,0.8002666632334391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,float16,0,3.0203574498494468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,fp8,0,0.8177546660105387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,64,128,1,fp8,fp8,0,0.7557066281636556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,float16,0,1.634533405303955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,fp8,0,3.0384960174560547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,64,0,1,fp8,fp8,0,2.7746826807657876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,float16,0,0.7125919659932455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,64,0,1,fp8,fp8,0,1.5213227272033691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,fp8,0,1.651583989461263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,fp8,0,0.7200799783070883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,64,128,1,fp8,fp8,0,0.6487253506978353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,float16,0,1.5339253743489583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,float16,0,0.717146635055542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,fp8,0,0.7221600214640299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,64,128,1,fp8,fp8,0,0.6543519894282023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,64,0,1,fp8,fp8,0,1.412783940633138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,fp8,0,1.5414293607076008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,float16,0,1.5416053136189778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,float16,0,0.7239733537038168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,fp8,0,1.5450773239135742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,64,0,1,fp8,fp8,0,1.416202704111735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,fp8,0,0.7329973379770914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,64,128,1,fp8,fp8,0,0.6637333234151205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,float16,0,0.424890677134196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,float16,0,1.551263968149821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,fp8,0,0.4366079966227214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,64,128,1,fp8,fp8,0,0.40567998091379803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,float16,0,0.8575359980265299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,fp8,0,1.5583573977152507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,64,0,1,fp8,fp8,0,1.425439993540446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,float16,0,0.37867732842763263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,fp8,0,0.8673973083496094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,64,0,1,fp8,fp8,0,0.7996586958567301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,fp8,0,0.3814506530761719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,64,128,1,fp8,fp8,0,0.35150933265686035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,float16,0,0.8073493639628092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,float16,0,0.3829546769460042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,64,0,1,fp8,fp8,0,0.7461706797281901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,fp8,0,0.8092106978098551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,64,128,1,fp8,fp8,0,0.353216012318929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,fp8,0,0.38625601927439374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,float16,0,0.810533364613851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,float16,0,0.38788266976674396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,fp8,0,0.8133707046508789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,64,0,1,fp8,fp8,0,0.7482613722483317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,fp8,0,0.39159464836120605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,64,128,1,fp8,fp8,0,0.3588000138600667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,float16,0,0.8159306844075521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,float16,0,0.24039999643961588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,fp8,0,0.24794133504231772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,float16,0,0.4691679875055949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,64,0,1,fp8,fp8,0,0.7557173569997152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,fp8,0,0.47600531578063965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,fp8,0,0.8197279771169027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,64,128,1,fp8,fp8,0,0.23190933465957642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,64,0,1,fp8,fp8,0,0.44207998116811115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,float16,0,0.21366933981577554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,fp8,0,0.2158986727396647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,64,0,1,fp8,fp8,0,0.41229331493377686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,float16,0,0.2158613403638204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,64,128,1,fp8,fp8,0,0.20334933201471964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,float16,0,0.4386826753616333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,fp8,0,0.441861351331075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,float16,0,0.44251732031504315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,fp8,0,0.21750932931900024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,64,128,1,fp8,fp8,0,0.20535467068354288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,fp8,0,0.4453440109888713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,float16,0,0.21935999393463135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,64,0,1,fp8,fp8,0,0.41236265500386554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,fp8,0,0.22019733985265097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,float16,0,0.44308265050252277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,64,128,1,fp8,fp8,0,0.20811200141906738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,fp8,0,0.4482933282852173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,float16,0,0.164682666460673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,64,0,1,fp8,fp8,0,0.41646401087443036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,64,0,1,fp8,fp8,0,0.2707786758740743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,float16,0,0.2906240026156108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,fp8,0,0.16436266899108887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,64,128,1,fp8,fp8,0,0.1543786625067393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,fp8,0,0.28990399837493896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,fp8,0,0.2871039907137553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,float16,0,0.16030933459599814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,float16,0,0.2892213265101115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,fp8,0,0.16156267126401266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,64,128,1,fp8,fp8,0,0.15004799763361612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,64,0,1,fp8,fp8,0,0.26664533217748004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,float16,0,0.16234133640925089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,float16,0,0.16107733050982156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,fp8,0,0.1625706652800242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,float16,0,0.2876480023066203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,64,128,1,fp8,fp8,0,0.15217600266138712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,fp8,0,0.28892266750335693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,64,0,1,fp8,fp8,0,0.2667786677678426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,float16,0,0.28917866945266724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,fp8,0,0.16366933782895407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,64,128,1,fp8,fp8,0,0.15013333161671957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,fp8,0,0.28763200839360553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,64,0,1,fp8,fp8,0,0.2680533329645793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,float16,0,2.0343467394510903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,64,128,1,fp8,fp8,0,1.8429813385009766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,fp8,0,2.049760023752848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,float16,0,2.0474133491516113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,float16,0,3.737290700276693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,64,0,1,fp8,fp8,0,3.429194768269857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,fp8,0,3.7503573099772134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,fp8,0,2.065845330556234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,64,128,1,fp8,fp8,0,1.8591465950012207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,float16,0,3.748384157816569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,float16,0,2.0751147270202637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,fp8,0,3.7742932637532554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,fp8,0,2.0934826532999673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,64,128,1,fp8,fp8,0,1.8921705881754558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,64,0,1,fp8,fp8,0,3.441306749979655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,float16,0,1.1676853497823079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,float16,0,3.790303866068522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,fp8,0,1.189359982808431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,64,128,1,fp8,fp8,0,1.096725304921468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,64,0,1,fp8,fp8,0,3.4704160690307617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,fp8,0,3.7997334798177085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,float16,0,2.0481972694396973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,fp8,0,2.064858595530192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,float16,0,1.0320320129394531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,fp8,0,1.0413866837819417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,64,0,1,fp8,fp8,0,1.904794692993164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,64,128,1,fp8,fp8,0,0.9349013169606527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,float16,0,1.893760045369466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,float16,0,1.0376213391621907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,fp8,0,1.9040746688842773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,64,0,1,fp8,fp8,0,1.7343467076619465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,fp8,0,1.0489919980367024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,64,128,1,fp8,fp8,0,0.9413866996765137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,float16,0,1.9008480707804363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,float16,0,1.0514026482899983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,fp8,0,1.9116214116414387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,64,0,1,fp8,fp8,0,1.7428053220113118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,fp8,0,1.0634400049845378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,64,128,1,fp8,fp8,0,0.9559840361277262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,float16,0,1.9143199920654297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,float16,0,0.6052320003509521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,fp8,0,0.6176480054855347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,64,128,1,fp8,fp8,0,0.5704906781514486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,float16,0,1.0544373194376628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,fp8,0,1.926693280537923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,64,0,1,fp8,fp8,0,1.7610987027486165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,float16,0,0.5379573504130045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,fp8,0,1.0667626857757568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,64,0,1,fp8,fp8,0,0.9819466272989908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,fp8,0,0.5418986479441324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,64,128,1,fp8,fp8,0,0.49167466163635254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,float16,0,0.9765600363413492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,float16,0,0.5399519999821981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,fp8,0,0.9820426305135092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,64,0,1,fp8,fp8,0,0.9018399715423584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,fp8,0,0.5455626646677653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,64,128,1,fp8,fp8,0,0.49615999062856037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,float16,0,0.9820480346679688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,fp8,0,0.9892213344573975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,float16,0,0.5472106536229452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,64,0,1,fp8,fp8,0,0.9050079981486002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,fp8,0,0.5544480085372925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,64,128,1,fp8,fp8,0,0.5019359985987345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,float16,0,0.9886346658070883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,fp8,0,0.33158934116363525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,float16,0,0.32442132631937665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,fp8,0,0.9976373513539633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,fp8,0,0.5649120012919108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,float16,0,0.5596479972203573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,64,0,1,fp8,fp8,0,0.9109706878662109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,64,128,1,fp8,fp8,0,0.30806400378545123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,64,0,1,fp8,fp8,0,0.5219626824061075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,float16,0,0.28684266408284503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,float16,0,0.5164373318354288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,fp8,0,0.28945066531499225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,64,128,1,fp8,fp8,0,0.2685386737187703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,fp8,0,0.5214399894078573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,64,0,1,fp8,fp8,0,0.4806453386942546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,float16,0,0.2896053393681844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,fp8,0,0.2932959993680318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,float16,0,0.5199360052744547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,64,128,1,fp8,fp8,0,0.2693866689999898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,fp8,0,0.5228799978892008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,64,0,1,fp8,fp8,0,0.48343467712402344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,float16,0,0.29471466938654584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,fp8,0,0.29811733961105347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,float16,0,0.5238720178604126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,64,128,1,fp8,fp8,0,0.2738879919052124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,fp8,0,0.5273546775182089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,float16,0,0.18316799402236938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,64,0,1,fp8,fp8,0,0.4875093301137288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,float16,0,0.30991466840108234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,fp8,0,0.1876266598701477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,64,128,1,fp8,fp8,0,0.17666133244832358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,fp8,0,0.31432000796000165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,float16,0,0.16081600387891135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,fp8,0,0.28753600517908734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,64,0,1,fp8,fp8,0,0.29446399211883545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,float16,0,0.28570665915807086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,fp8,0,0.16080533464749655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,64,128,1,fp8,fp8,0,0.15026666720708212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,64,0,1,fp8,fp8,0,0.26714134216308594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,float16,0,0.16239999731381735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,fp8,0,0.16246933738390604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,float16,0,0.2887893319129944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,64,128,1,fp8,fp8,0,0.15380266308784485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,fp8,0,0.28916800022125244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,64,0,1,fp8,fp8,0,0.2701279918352763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,float16,0,0.16436266899108887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,64,0,1,fp8,fp8,0,0.274890661239624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,float16,0,0.2916693290074666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,float16,0,0.20053333044052124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,fp8,0,0.16713066895802817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,64,128,1,fp8,fp8,0,0.1584106683731079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,fp8,0,0.29340267181396484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,float16,0,0.12361066540082295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,fp8,0,0.12130666772524516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,64,128,1,fp8,fp8,0,0.1174720029036204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,fp8,0,0.20020800828933716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,64,0,1,fp8,fp8,0,0.18709866205851236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,float16,0,0.12307199835777283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,float16,0,0.19984533389409384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,fp8,0,0.12142399946848552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,64,128,1,fp8,fp8,0,0.11349333326021831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,fp8,0,0.1989226738611857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,64,128,1,fp8,fp8,0,0.11316800117492676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,64,0,1,fp8,fp8,0,0.1848906675974528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,fp8,0,0.19671465953191122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,float16,0,0.12339199582735698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,float16,0,0.19932266076405844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,fp8,0,0.12337066729863484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,64,0,1,fp8,fp8,0,0.1844693422317505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,float16,0,0.120688001314799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,float16,0,0.19881065686543783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,fp8,0,0.12132267157236735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,64,128,1,fp8,fp8,0,0.11552000045776367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,fp8,0,0.19858133792877197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,64,0,1,fp8,fp8,0,0.18505599101384482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,float16,0,2.7314453125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,64,128,1,fp8,fp8,0,2.4405973752339682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,fp8,0,2.7417332331339517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,float16,0,2.7428852717081704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,float16,0,4.052474657694499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,64,0,1,fp8,fp8,0,3.68125851949056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,fp8,0,4.065754572550456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,64,128,1,fp8,fp8,0,2.467957337697347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,fp8,0,2.7585439682006836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,float16,0,4.068751970926921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,float16,0,2.8299840291341147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,fp8,0,4.087786674499512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,fp8,0,2.834405263264974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,64,0,1,fp8,fp8,0,3.7121814092000327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,64,128,1,fp8,fp8,0,2.5085600217183432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,float16,0,4.161525408426921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,float16,0,1.5353546142578125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,fp8,0,1.5597972869873047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,64,128,1,fp8,fp8,0,1.4413599967956543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,float16,0,2.227301279703776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,fp8,0,4.164538701375325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,float16,0,1.3499946594238281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,64,0,1,fp8,fp8,0,3.749269485473633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,fp8,0,2.2457173665364585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,64,0,1,fp8,fp8,0,2.0706666310628257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,fp8,0,1.36297607421875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,64,128,1,fp8,fp8,0,1.221274693806966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,float16,0,2.020554701487223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,fp8,0,2.0329386393229165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,float16,0,1.3608694076538086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,64,0,1,fp8,fp8,0,1.8454666137695312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,64,128,1,fp8,fp8,0,1.2329386870066326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,fp8,0,1.375482718149821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,float16,0,2.0282880465189614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,fp8,0,2.046079953511556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,64,0,1,fp8,fp8,0,1.8601706822713215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,float16,0,1.3784747123718262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,64,128,1,fp8,fp8,0,1.2514879703521729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,fp8,0,1.3889600435892742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,float16,0,2.0508480072021484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,float16,0,0.783397356669108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,fp8,0,0.7976906299591064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,float16,0,1.1319200197855632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,64,128,1,fp8,fp8,0,0.7353333632151285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,fp8,0,2.0629547437032065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,64,0,1,fp8,fp8,0,1.8764746983846028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,float16,0,0.6926506360371908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,fp8,0,1.146890640258789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,64,0,1,fp8,fp8,0,1.0565706888834636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,fp8,0,0.6986933549245199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,64,128,1,fp8,fp8,0,0.6278666655222574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,float16,0,1.0341333548227947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,fp8,0,1.0373067061106365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,float16,0,0.6980533599853516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,64,0,1,fp8,fp8,0,0.946458657582601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,float16,0,1.0380053520202637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,fp8,0,0.7047893206278483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,64,128,1,fp8,fp8,0,0.6354506810506185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,float16,0,0.7050506273905436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,fp8,0,1.0471786657969158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,64,0,1,fp8,fp8,0,0.9550613562266032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,fp8,0,0.7123573621114095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,64,128,1,fp8,fp8,0,0.6434453328450521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,float16,0,1.0499146779378254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,float16,0,0.40856532255808514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,fp8,0,1.05512531598409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,64,0,1,fp8,fp8,0,0.9636639753977457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,float16,0,0.5903786818186442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,fp8,0,0.418287992477417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,64,128,1,fp8,fp8,0,0.38647464911142987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,fp8,0,0.5980533361434937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,float16,0,0.36076800028483075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,64,0,1,fp8,fp8,0,0.5532799959182739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,fp8,0,0.5402079820632935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,float16,0,0.5362720092137655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,fp8,0,0.365013321240743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,64,128,1,fp8,fp8,0,0.3338346481323242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,float16,0,0.3641226689020793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,64,0,1,fp8,fp8,0,0.4987573226292928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,float16,0,0.5414933363596598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,64,0,1,fp8,fp8,0,0.5014346837997437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,fp8,0,0.3693173329035441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,64,128,1,fp8,fp8,0,0.33609600861867267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,fp8,0,0.544047991434733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,float16,0,0.3709333340326945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,float16,0,0.5454933245976766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,fp8,0,0.3734240134557088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,float16,0,0.22197333971659342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,64,128,1,fp8,fp8,0,0.3407200177510579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,fp8,0,0.5513013203938802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,64,0,1,fp8,fp8,0,0.5069226821263632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,fp8,0,0.32371199131011963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,float16,0,0.3174293239911397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,fp8,0,0.22633065780003866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,64,128,1,fp8,fp8,0,0.21150400241216025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,64,0,1,fp8,fp8,0,0.3000906705856323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,64,128,1,fp8,fp8,0,0.18280533949534097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,float16,0,0.19332265853881836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,float16,0,0.28705066442489624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,fp8,0,0.19638933738072714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,fp8,0,0.2885706623395284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,64,0,1,fp8,fp8,0,0.26970134178797406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,float16,0,0.19524266322453818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,float16,0,0.28985599676767987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,fp8,0,0.19732266664505005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,64,128,1,fp8,fp8,0,0.18555732568105063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,float16,0,0.29202133417129517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,fp8,0,0.2020639975865682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,fp8,0,0.29068267345428467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,64,0,1,fp8,fp8,0,0.27313599983851117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,float16,0,0.19918400049209595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,64,128,1,fp8,fp8,0,0.18715200821558634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,fp8,0,0.29609066247940063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,64,0,1,fp8,fp8,0,0.2763146758079529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,float16,0,0.12956266601880392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,float16,0,0.18035733699798584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,fp8,0,0.13179733355840048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,64,128,1,fp8,fp8,0,0.1260640025138855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,fp8,0,0.1836586594581604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,64,0,1,fp8,fp8,0,0.17333867152531943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,float16,0,0.11544533570607503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,float16,0,0.1649066706498464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,fp8,0,0.1164479951063792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,64,128,1,fp8,fp8,0,0.10515200098355611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,64,128,1,fp8,fp8,0,0.10638933380444844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,fp8,0,0.16620266437530518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,64,0,1,fp8,fp8,0,0.15435199936230978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,float16,0,0.1148426632086436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,float16,0,0.1649386684099833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,fp8,0,0.11555199821790059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,fp8,0,0.16634666919708252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,64,0,1,fp8,fp8,0,0.15437333782513937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,float16,0,0.11730666955312093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,float16,0,0.16737600167592367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,fp8,0,0.11692800124486287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,64,128,1,fp8,fp8,0,0.10771200060844421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,fp8,0,0.16865599155426025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,64,0,1,fp8,fp8,0,0.15612266461054483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,float16,0,0.08846400181452434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,float16,0,0.12128000458081563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,float16,0,0.11998400092124939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,fp8,0,0.08866666754086812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,64,128,1,fp8,fp8,0,0.08401067058245341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,fp8,0,0.12101866801579793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,fp8,0,0.11994666854540507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,64,0,1,fp8,fp8,0,0.11313066879908244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,float16,0,0.0874826709429423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,fp8,0,0.08861866593360901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,64,128,1,fp8,fp8,0,0.08274133503437042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,64,128,1,fp8,fp8,0,0.08272533118724823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,64,0,1,fp8,fp8,0,0.11289067069689433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,float16,0,0.08771199981371562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,float16,0,0.12166933218638103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,fp8,0,0.08782399694124858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,fp8,0,0.12060800194740295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,64,0,1,fp8,fp8,0,0.11342933773994446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,float16,0,0.08866133292516072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,float16,0,0.12160533666610718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,fp8,0,0.08694400389989217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,64,128,1,fp8,fp8,0,0.08267199993133545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,fp8,0,0.12134400010108948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,64,0,1,fp8,fp8,0,0.11343466242154439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,float16,0,2.0177440643310547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,64,128,1,fp8,fp8,0,1.8177439371744792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,fp8,0,2.033466657002767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,float16,0,2.671056111653646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,float16,0,2.0381867090861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,64,0,1,fp8,fp8,0,2.4319307009379068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,fp8,0,2.6858558654785156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,float16,0,2.681669235229492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,fp8,0,2.0471040407816568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,64,128,1,fp8,fp8,0,1.8331786791483562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,64,0,1,fp8,fp8,0,2.448655923207601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,float16,0,2.0853439966837564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,fp8,0,2.6943785349527993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,fp8,0,2.0823465983072915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,64,128,1,fp8,fp8,0,1.8618186314900715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,float16,0,2.7262452443440757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,float16,0,1.153061310450236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,64,0,1,fp8,fp8,0,2.4797226587931314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,fp8,0,1.1704479853312175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,fp8,0,2.732773462931315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,64,128,1,fp8,fp8,0,1.0824586550394695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,float16,0,1.4953546524047852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,fp8,0,1.5111573537190754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,float16,0,1.0153066317240398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,64,0,1,fp8,fp8,0,1.3943999608357747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,fp8,0,1.0231893062591553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,float16,0,1.3435680071512859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,64,128,1,fp8,fp8,0,0.9202079772949219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,64,0,1,fp8,fp8,0,1.2276373704274495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,fp8,0,1.3527894020080566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,float16,0,1.02182936668396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,fp8,0,1.031546672185262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,64,128,1,fp8,fp8,0,0.9262773195902506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,float16,0,1.3518187204996746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,fp8,0,1.3625013033548992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,float16,0,1.0360960165659587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,64,0,1,fp8,fp8,0,1.2353973388671875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,float16,0,1.3680639266967773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,fp8,0,1.045082648595174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,64,128,1,fp8,fp8,0,0.9402773380279541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,float16,0,0.5908639828364054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,fp8,0,1.377423922220866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,float16,0,0.7652053038279215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,fp8,0,0.6010133425394694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,64,0,1,fp8,fp8,0,1.2527893384297688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,64,128,1,fp8,fp8,0,0.5554666519165039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,fp8,0,0.7768692970275879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,64,0,1,fp8,fp8,0,0.7163999875386556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,float16,0,0.5210880041122437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,fp8,0,0.5266826550165812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,float16,0,0.6876160303751627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,64,128,1,fp8,fp8,0,0.4759039878845215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,fp8,0,0.6955200036366781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,float16,0,0.693669319152832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,64,0,1,fp8,fp8,0,0.6346933444341024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,float16,0,0.524885336558024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,fp8,0,0.5307999849319458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,64,128,1,fp8,fp8,0,0.47933868567148846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,fp8,0,0.6978240013122559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,float16,0,0.7011093298594157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,64,0,1,fp8,fp8,0,0.6395466725031534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,float16,0,0.5325013399124146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,fp8,0,0.538047989209493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,64,128,1,fp8,fp8,0,0.4858986536661784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,fp8,0,0.31772265831629437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,float16,0,0.3104959925015767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,fp8,0,0.7069013118743896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,64,0,1,fp8,fp8,0,0.645583987236023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,float16,0,0.402074654897054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,64,128,1,fp8,fp8,0,0.2937120000521342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,fp8,0,0.4064106543858846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,64,0,1,fp8,fp8,0,0.3781813383102417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,float16,0,0.27161065737406415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,float16,0,0.35930665334065753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,fp8,0,0.2736533284187317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,64,128,1,fp8,fp8,0,0.2526879906654358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,fp8,0,0.3619413375854492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,64,0,1,fp8,fp8,0,0.3346879879633586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,float16,0,0.2736799915631612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,64,0,1,fp8,fp8,0,0.33983465035756427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,float16,0,0.3615200122197469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,fp8,0,0.27703466018040973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,64,128,1,fp8,fp8,0,0.2564693291982015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,fp8,0,0.36562132835388184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,float16,0,0.28005866209665936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,fp8,0,0.3714933395385742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,float16,0,0.3670026858647664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,fp8,0,0.2815306584040324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,64,128,1,fp8,fp8,0,0.26079465945561725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,float16,0,0.16886399189631143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,64,0,1,fp8,fp8,0,0.3426400025685628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,float16,0,0.1458293298880259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,float16,0,0.21928532918294272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,float16,0,0.19241599241892496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,fp8,0,0.17356799046198526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,64,128,1,fp8,fp8,0,0.16321600476900736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,fp8,0,0.19570666551589966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,64,0,1,fp8,fp8,0,0.18052266041437784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,fp8,0,0.22223466634750366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,64,0,1,fp8,fp8,0,0.20759467283884683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,fp8,0,0.14735466241836548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,64,128,1,fp8,fp8,0,0.1358453333377838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,float16,0,0.14756799737612405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,float16,0,0.19502933820088705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,fp8,0,0.14899200201034546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,64,128,1,fp8,fp8,0,0.13802666465441385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,fp8,0,0.1974560022354126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,64,0,1,fp8,fp8,0,0.18425599733988443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,float16,0,0.14897599816322327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,float16,0,0.1981173356374105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,fp8,0,0.15069333712259927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,64,128,1,fp8,fp8,0,0.1439520021279653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,fp8,0,0.19940267006556192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,64,0,1,fp8,fp8,0,0.18863467375437418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,float16,0,0.09615466992060344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,float16,0,0.12609600027402243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,fp8,0,0.0981333355108897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,64,128,1,fp8,fp8,0,0.0965119997660319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,fp8,0,0.12782399853070578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,64,0,1,fp8,fp8,0,0.1234826644261678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,float16,0,0.08646399776140849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,float16,0,0.11684266726175944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,fp8,0,0.08756267031033833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,64,128,1,fp8,fp8,0,0.08182933429876964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,fp8,0,0.0883893370628357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,fp8,0,0.1179146667321523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,64,0,1,fp8,fp8,0,0.10910933216412862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,float16,0,0.08687466382980347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,float16,0,0.1181653340657552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,float16,0,0.11884799599647522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,64,128,1,fp8,fp8,0,0.08211733400821686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,fp8,0,0.11947733163833618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,64,0,1,fp8,fp8,0,0.10942400495211284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,float16,0,0.08684266606966655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,float16,0,0.06818133095900218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,fp8,0,0.0886346697807312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,64,128,1,fp8,fp8,0,0.08272533118724823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,fp8,0,0.11885866522789001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,fp8,0,0.088837335507075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,64,0,1,fp8,fp8,0,0.10924266775449117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,float16,0,0.0886346697807312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,fp8,0,0.06670400003592174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,64,128,1,fp8,fp8,0,0.0631039987007777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,64,0,1,fp8,fp8,0,0.08251733581225078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,float16,0,0.06609599788983662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,float16,0,0.08718933661778767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,fp8,0,0.06666133304437001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,fp8,0,0.06622933348019917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,64,128,1,fp8,fp8,0,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,fp8,0,0.08758399883906047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,64,0,1,fp8,fp8,0,0.08275733391443889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,float16,0,0.066170667608579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,float16,0,0.08814932902654012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,64,128,1,fp8,fp8,0,0.06262933214505513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,fp8,0,0.08854400118192036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,64,0,1,fp8,fp8,0,0.08307733138402303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,64,0,1,fp8,fp8,0,0.08240533371766408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,float16,0,0.068122665087382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,float16,0,0.08681066830952962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,fp8,0,0.06666666766007741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,64,128,1,fp8,fp8,0,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,fp8,0,0.08825066685676575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,float16,0,2.4243040084838867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,fp8,0,2.416144053141276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,64,128,1,fp8,fp8,0,2.3585546811421714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,float16,0,2.8270187377929688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,float16,0,2.4399627049764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,fp8,0,2.8286027908325195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,64,0,1,fp8,fp8,0,2.759685198465983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,float16,0,2.8640213012695312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,fp8,0,2.417311986287435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,64,128,1,fp8,fp8,0,2.40228271484375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,fp8,0,2.837434768676758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,64,0,1,fp8,fp8,0,2.786677360534668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,float16,0,2.4913013776143393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,fp8,0,2.428778648376465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,64,128,1,fp8,fp8,0,2.3864320119222007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,float16,0,2.8593918482462564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,float16,0,1.318346659342448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,fp8,0,1.3036746978759766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,float16,0,1.5360213915507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,64,128,1,fp8,fp8,0,1.3035146395365398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,fp8,0,2.8428961435953775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,64,0,1,fp8,fp8,0,2.7954934438069663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,fp8,0,1.5095733006795247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,64,0,1,fp8,fp8,0,1.5140320460001628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,float16,0,1.219055970509847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,fp8,0,1.2165013154347737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,float16,0,1.4282026290893555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,64,128,1,fp8,fp8,0,1.1408373514811199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,fp8,0,1.4260907173156738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,64,0,1,fp8,fp8,0,1.3338932991027832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,float16,0,1.221717357635498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,float16,0,1.4252266883850098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,fp8,0,1.2191680272420247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,64,128,1,fp8,fp8,0,1.1730079650878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,fp8,0,1.42847474416097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,64,0,1,fp8,fp8,0,1.3813279469807942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,float16,0,1.2281920115152996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,fp8,0,1.225167989730835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,float16,0,1.4393653869628906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,64,128,1,fp8,fp8,0,1.173957347869873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,fp8,0,1.434597333272298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,float16,0,0.6654026508331299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,float16,0,0.7801226774851481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,64,0,1,fp8,fp8,0,1.3699146906534831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,fp8,0,0.6534133354822794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,64,128,1,fp8,fp8,0,0.6443306605021158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,fp8,0,0.7664746443430582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,64,0,1,fp8,fp8,0,0.7512586911519369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,float16,0,0.6188426812489828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,float16,0,0.7259573141733805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,fp8,0,0.6192213296890259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,64,128,1,fp8,fp8,0,0.5720426638921102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,fp8,0,0.7239733537038168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,64,0,1,fp8,fp8,0,0.6766400337219238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,float16,0,0.6181706587473551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,float16,0,0.7246452967325846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,fp8,0,0.6193333466847738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,fp8,0,0.7264906565348307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,64,128,1,fp8,fp8,0,0.5765599807103475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,64,0,1,fp8,fp8,0,0.6775253613789877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,float16,0,0.6248319943745931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,float16,0,0.732863982518514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,fp8,0,0.6223040024439493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,64,128,1,fp8,fp8,0,0.5844586690266927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,fp8,0,0.7320960362752279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,float16,0,0.3434293270111084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,64,0,1,fp8,fp8,0,0.6877013047536215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,float16,0,0.400490681330363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,fp8,0,0.33661333719889325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,64,128,1,fp8,fp8,0,0.33194132645924884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,fp8,0,0.3961333433787028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,64,0,1,fp8,fp8,0,0.3861120144526164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,float16,0,0.31944000720977783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,float16,0,0.37293867270151776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,fp8,0,0.3172373374303182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,64,128,1,fp8,fp8,0,0.29552000761032104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,fp8,0,0.3725493351618449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,64,0,1,fp8,fp8,0,0.3475786844889323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,float16,0,0.3190293312072754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,float16,0,0.37542398770650226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,fp8,0,0.31783999999364215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,64,128,1,fp8,fp8,0,0.29612799485524494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,fp8,0,0.3731146653493245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,64,0,1,fp8,fp8,0,0.3488159974416097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,64,128,1,fp8,fp8,0,0.30062933762868244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,float16,0,0.32205865780512494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,float16,0,0.3791840076446533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,fp8,0,0.3209013342857361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,fp8,0,0.3776533206303914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,float16,0,0.18012799819310507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,64,0,1,fp8,fp8,0,0.35396798451741535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,float16,0,0.21185600757598877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,fp8,0,0.17819199959437051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,64,128,1,fp8,fp8,0,0.17584532499313354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,fp8,0,0.20822399854660034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,64,0,1,fp8,fp8,0,0.20383999745051065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,float16,0,0.16501333316167197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,float16,0,0.195743997891744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,64,128,1,fp8,fp8,0,0.15648000439008078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,fp8,0,0.1667840083440145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,fp8,0,0.1962719957033793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,64,0,1,fp8,fp8,0,0.1850879987080892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,float16,0,0.1667520006497701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,float16,0,0.19565333922704062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,float16,0,0.16911466916402182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,fp8,0,0.16576000054677328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,64,128,1,fp8,fp8,0,0.1581546664237976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,fp8,0,0.1965013345082601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,64,0,1,fp8,fp8,0,0.18544000387191772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,float16,0,0.19874133666356406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,fp8,0,0.17028266191482544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,64,128,1,fp8,fp8,0,0.16050133109092712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,fp8,0,0.1984106699625651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,64,0,1,fp8,fp8,0,0.18959466616312662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,float16,0,0.10314666231473286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,float16,0,0.0925600032011668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,float16,0,0.11899733543395996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,fp8,0,0.10086933771769206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,64,128,1,fp8,fp8,0,0.1013759970664978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,fp8,0,0.11626133322715759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,64,0,1,fp8,fp8,0,0.11682666341463725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,float16,0,0.10930666327476501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,fp8,0,0.09363733728726704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,64,128,1,fp8,fp8,0,0.08661333719889323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,fp8,0,0.10917333761850993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,64,0,1,fp8,fp8,0,0.10087999701499939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,float16,0,0.09284266829490662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,float16,0,0.10890666643778484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,float16,0,0.10898133118947347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,fp8,0,0.09387200077374776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,64,128,1,fp8,fp8,0,0.085125337044398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,fp8,0,0.10931733250617981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,64,0,1,fp8,fp8,0,0.10225600004196167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,float16,0,0.09490133325258891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,float16,0,0.06764799853165944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,fp8,0,0.09317333499590556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,64,128,1,fp8,fp8,0,0.08674133817354839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,64,0,1,fp8,fp8,0,0.0656160016854604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,fp8,0,0.10913599530855815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,64,0,1,fp8,fp8,0,0.10286399722099304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,float16,0,0.0558240016301473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,64,128,1,fp8,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,fp8,0,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,float16,0,0.05409599840641022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,float16,0,0.06484266618887584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,fp8,0,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,64,128,1,fp8,fp8,0,0.052058666944503784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,fp8,0,0.06509333352247874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,64,0,1,fp8,fp8,0,0.06198933223883311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,float16,0,0.056159997979799904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,float16,0,0.06569600105285645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,float16,0,0.0662613312403361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,fp8,0,0.053871999184290566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,64,128,1,fp8,fp8,0,0.05190399785836538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,fp8,0,0.06604266663392384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,fp8,0,0.06504000226656596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,64,0,1,fp8,fp8,0,0.06235733131567637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,float16,0,0.054431999723116554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,fp8,0,0.05608533322811127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,64,128,1,fp8,fp8,0,0.05169600248336792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,64,0,1,fp8,fp8,0,0.06216000020503998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,float16,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,float16,0,0.04560000201066335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,64,128,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,fp8,0,0.04540266593297323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,64,0,1,fp8,fp8,0,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,fp8,0,0.04359999795754751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,float16,0,0.03845866769552231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,float16,0,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,64,128,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,64,0,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,float16,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,float16,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,float16,0,0.03693866729736328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,fp8,0,0.037791999677817024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,64,128,1,fp8,fp8,0,0.03426666557788849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,fp8,0,0.045642669002215065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,fp8,0,0.04378133515516917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,64,0,1,fp8,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,64,0,1,fp8,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,float16,0,0.04458666841189066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,64,128,1,fp8,fp8,0,0.03532266616821289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,float16,0,2.339066664377848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,float16,0,2.358895937601725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,fp8,0,2.3339573542277017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,64,128,1,fp8,fp8,0,2.2776427268981934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,fp8,0,2.359322706858317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,64,0,1,fp8,fp8,0,2.3063626289367676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,float16,0,2.353114604949951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,float16,0,2.3757707277933755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,fp8,0,2.3383092880249023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,64,128,1,fp8,fp8,0,2.317637284596761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,fp8,0,2.370528062184652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,64,0,1,fp8,fp8,0,2.3459626833597818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,float16,0,2.3913493156433105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,float16,0,2.422442595163981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,fp8,0,2.3686827023824057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,64,128,1,fp8,fp8,0,2.3206559816996255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,float16,0,1.301263968149821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,float16,0,1.292453368504842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,fp8,0,1.2641226450602214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,64,128,1,fp8,fp8,0,1.2449599901835124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,fp8,0,2.385477383931478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,64,0,1,fp8,fp8,0,2.346874713897705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,fp8,0,1.294368028640747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,64,0,1,fp8,fp8,0,1.2738293011983235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,float16,0,1.1772960027058919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,float16,0,1.190885305404663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,fp8,0,1.179754654566447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,64,128,1,fp8,fp8,0,1.0978773434956868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,64,0,1,fp8,fp8,0,1.1164320309956868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,fp8,0,1.1866079966227214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,float16,0,1.1819519996643066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,float16,0,1.1940746307373047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,fp8,0,1.175162633260091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,64,128,1,fp8,fp8,0,1.1593600114186604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,fp8,0,1.1879573663075764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,64,0,1,fp8,fp8,0,1.1698933442433674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,float16,0,1.1920159657796223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,float16,0,1.2016639709472656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,64,128,1,fp8,fp8,0,1.1286239624023438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,fp8,0,1.1842400232950847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,float16,0,0.6459840138753256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,fp8,0,1.1941386858622234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,float16,0,0.6545653343200684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,64,0,1,fp8,fp8,0,1.1599146525065105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,fp8,0,0.6331146558125814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,64,128,1,fp8,fp8,0,0.6287200053532919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,fp8,0,0.645957350730896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,64,0,1,fp8,fp8,0,0.6338880062103271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,float16,0,0.5995519955952963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,float16,0,0.6022186676661173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,fp8,0,0.602346658706665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,fp8,0,0.5961066484451294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,64,128,1,fp8,fp8,0,0.5539946556091309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,float16,0,0.6055200099945068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,64,0,1,fp8,fp8,0,0.5646560192108154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,float16,0,0.6004319985707601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,fp8,0,0.599125345547994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,64,128,1,fp8,fp8,0,0.5581599871317545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,fp8,0,0.6047679980595907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,64,0,1,fp8,fp8,0,0.5633013248443604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,float16,0,0.6067999998728434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,float16,0,0.6121866703033447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,fp8,0,0.6093973318735758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,fp8,0,0.6015573342641195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,64,128,1,fp8,fp8,0,0.566645344098409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,float16,0,0.33214932680130005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,64,0,1,fp8,fp8,0,0.576197346051534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,float16,0,0.33658134937286377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,fp8,0,0.3267199993133545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,64,128,1,fp8,fp8,0,0.3224053382873535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,fp8,0,0.3322346607844035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,64,0,1,fp8,fp8,0,0.32682667175928753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,float16,0,0.3071733315785726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,float16,0,0.3110453287760417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,fp8,0,0.30634133021036786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,64,128,1,fp8,fp8,0,0.28590933481852215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,float16,0,0.3118773301442464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,fp8,0,0.3099840084711711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,64,0,1,fp8,fp8,0,0.2904053330421448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,float16,0,0.3080480098724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,fp8,0,0.30952000617980957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,64,128,1,fp8,fp8,0,0.28696000576019287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,fp8,0,0.3118026653925578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,64,0,1,fp8,fp8,0,0.29045865933100384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,float16,0,0.31246399879455566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,float16,0,0.3155733346939087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,64,0,1,fp8,fp8,0,0.2951093316078186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,fp8,0,0.31142934163411456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,64,128,1,fp8,fp8,0,0.2911840081214905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,fp8,0,0.31379733482996625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,float16,0,0.1752799948056539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,float16,0,0.178874671459198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,fp8,0,0.17291200160980225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,64,128,1,fp8,fp8,0,0.1705333391825358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,fp8,0,0.17700799306233725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,64,128,1,fp8,fp8,0,0.15039466818173727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,64,0,1,fp8,fp8,0,0.17354132731755575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,float16,0,0.16062933206558228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,float16,0,0.16341867049535116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,fp8,0,0.16154666741689047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,fp8,0,0.1625599960486094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,64,0,1,fp8,fp8,0,0.15371732910474142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,float16,0,0.16164799531300864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,float16,0,0.163674662510554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,fp8,0,0.1623093287150065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,64,128,1,fp8,fp8,0,0.15268266201019287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,fp8,0,0.16309866309165955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,fp8,0,0.1641973356405894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,64,0,1,fp8,fp8,0,0.15437333782513937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,float16,0,0.16426666577657065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,float16,0,0.164901336034139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,64,128,1,fp8,fp8,0,0.15429332852363586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,fp8,0,0.16606400410334268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,64,0,1,fp8,fp8,0,0.15796800454457602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,64,0,1,fp8,fp8,0,0.10053867101669312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,float16,0,0.09987733761469524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,float16,0,0.10115200281143188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,fp8,0,0.09877333045005798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,64,128,1,fp8,fp8,0,0.0988106628259023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,fp8,0,0.09215466181437175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,fp8,0,0.09943999846776326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,float16,0,0.0901759962240855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,float16,0,0.09065600236256917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,float16,0,0.09297600388526917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,fp8,0,0.0909440020720164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,64,128,1,fp8,fp8,0,0.08372267087300618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,64,0,1,fp8,fp8,0,0.08503466844558716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,float16,0,0.09115200241406758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,fp8,0,0.09067199627558391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,fp8,0,0.092031995455424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,64,128,1,fp8,fp8,0,0.08246933420499165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,fp8,0,0.09134933352470398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,64,0,1,fp8,fp8,0,0.08462400237719218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,float16,0,0.09081600109736125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,float16,0,0.09326933821042378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,64,128,1,fp8,fp8,0,0.08433066805203755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,fp8,0,0.0913759966691335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,64,0,1,fp8,fp8,0,0.08657067020734151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,float16,0,0.055717334151268005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,float16,0,0.056202664971351624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,float16,0,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,fp8,0,0.05586666862169901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,64,128,1,fp8,fp8,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,64,128,1,fp8,fp8,0,0.054431999723116554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,fp8,0,0.05646933118502299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,64,0,1,fp8,fp8,0,0.05417599777380625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,float16,0,0.05342400074005127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,fp8,0,0.053770666321118675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,fp8,0,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,64,0,1,fp8,fp8,0,0.05120533208052317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,float16,0,0.05351999898751577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,float16,0,0.054154664278030396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,float16,0,0.05455466608206431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,fp8,0,0.053674668073654175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,64,128,1,fp8,fp8,0,0.04960533479849497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,fp8,0,0.05547733108202616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,64,0,1,fp8,fp8,0,0.052042668064435325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,float16,0,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,fp8,0,0.053823997577031456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,64,128,1,fp8,fp8,0,0.05002133548259735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,fp8,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,64,0,1,fp8,fp8,0,0.051354666550954185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,float16,0,0.03717333326737086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,float16,0,0.03749866783618927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,64,128,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,fp8,0,0.037445334096749626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,64,0,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,float16,0,0.03578133384386698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,float16,0,0.03670933345953623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,64,128,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,64,0,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,float16,0,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,64,0,1,fp8,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,fp8,0,0.03589866558710734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,64,128,1,fp8,fp8,0,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,fp8,0,0.03695466617743174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,float16,0,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,float16,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,fp8,0,0.036687999963760376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,64,128,1,fp8,fp8,0,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,64,0,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,float16,0,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,float16,0,0.026127999027570088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,fp8,0,0.026698666314284008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,64,128,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,64,0,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,float16,0,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,float16,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,64,128,1,fp8,fp8,0,0.024288001159826916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,64,0,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,float16,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,fp8,0,0.02651199946800868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,fp8,0,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,64,128,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,64,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,float16,0,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,fp8,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,float16,0,1.0939093430836995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,float16,0,1.0708373387654622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,fp8,0,1.0879039764404297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,64,128,1,fp8,fp8,0,1.0298186937967937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,fp8,0,1.0690560340881348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,64,0,1,fp8,fp8,0,1.007541338602702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,float16,0,1.0965332984924316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,float16,0,1.0733439922332764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,fp8,0,1.0933067003885906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,64,128,1,fp8,fp8,0,1.070522705713908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,fp8,0,1.071781317392985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,64,0,1,fp8,fp8,0,1.0404693285624187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,float16,0,1.1070880095163982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,float16,0,1.0856160322825115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,fp8,0,1.1028532981872559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,64,128,1,fp8,fp8,0,1.0570293267567952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,float16,0,0.6014986832936605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,fp8,0,1.0797333717346191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,float16,0,0.5916266838709513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,64,0,1,fp8,fp8,0,1.033573309580485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,fp8,0,0.589850664138794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,64,128,1,fp8,fp8,0,0.5868213176727295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,fp8,0,0.5798933506011963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,64,0,1,fp8,fp8,0,0.5755413373311361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,float16,0,0.5536426703135172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,float16,0,0.5422879854838053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,fp8,0,0.5519680182139078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,64,128,1,fp8,fp8,0,0.5177119970321655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,fp8,0,0.5395520130793253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,64,0,1,fp8,fp8,0,0.5039199988047282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,float16,0,0.5554399887720743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,float16,0,0.5432213147481283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,fp8,0,0.5546506643295288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,64,128,1,fp8,fp8,0,0.5177706480026245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,fp8,0,0.5423680146535238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,64,0,1,fp8,fp8,0,0.508080005645752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,float16,0,0.5614720185597738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,float16,0,0.5513546864191691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,64,0,1,fp8,fp8,0,0.5119040012359619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,float16,0,0.3081013361612956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,fp8,0,0.5598239898681641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,64,128,1,fp8,fp8,0,0.5243573188781738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,64,128,1,fp8,fp8,0,0.3027519981066386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,fp8,0,0.5486026604970297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,64,0,1,fp8,fp8,0,0.2962080041567485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,float16,0,0.3037066658337911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,fp8,0,0.30288533369700116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,fp8,0,0.29872000217437744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,float16,0,0.28356800476710003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,64,0,1,fp8,fp8,0,0.25924267371495563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,float16,0,0.2779573400815328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,fp8,0,0.283786674340566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,64,128,1,fp8,fp8,0,0.26335465908050537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,fp8,0,0.2783466577529907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,float16,0,0.2842666705449422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,float16,0,0.27992000182469684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,fp8,0,0.2853279908498128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,64,128,1,fp8,fp8,0,0.26547733942667645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,fp8,0,0.2787359952926636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,64,0,1,fp8,fp8,0,0.26095465819040936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,float16,0,0.2890613277753194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,float16,0,0.28313066562016803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,fp8,0,0.28751999139785767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,64,128,1,fp8,fp8,0,0.2711679935455322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,fp8,0,0.28201067447662354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,64,0,1,fp8,fp8,0,0.26667733987172443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,float16,0,0.16295466820398966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,float16,0,0.15996266404787698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,float16,0,0.15031466881434122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,fp8,0,0.16059733430544534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,64,128,1,fp8,fp8,0,0.16197333733240762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,fp8,0,0.1581013302008311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,64,0,1,fp8,fp8,0,0.1590986649195353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,float16,0,0.14706666270891824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,float16,0,0.14995200435320535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,fp8,0,0.14936000108718872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,64,128,1,fp8,fp8,0,0.13979732990264893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,64,128,1,fp8,fp8,0,0.1413706640402476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,fp8,0,0.1462399959564209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,64,0,1,fp8,fp8,0,0.1360373298327128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,float16,0,0.1474133332570394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,fp8,0,0.1513706644376119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,64,128,1,fp8,fp8,0,0.14410133163134256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,fp8,0,0.14682666460673013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,64,0,1,fp8,fp8,0,0.13802666465441385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,float16,0,0.15004266301790872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,float16,0,0.15244266390800476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,fp8,0,0.1539466679096222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,fp8,0,0.14897066354751587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,64,0,1,fp8,fp8,0,0.14070399602254233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,float16,0,0.0936906635761261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,float16,0,0.09095999598503113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,fp8,0,0.09198932846387227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,64,128,1,fp8,fp8,0,0.09332266449928284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,64,128,1,fp8,fp8,0,0.07844266792138417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,fp8,0,0.09008000294367473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,64,0,1,fp8,fp8,0,0.07681066791216533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,64,0,1,fp8,fp8,0,0.09230400125185649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,float16,0,0.08628799517949422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,float16,0,0.08297599852085114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,64,128,1,fp8,fp8,0,0.0790719985961914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,fp8,0,0.08402666449546814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,fp8,0,0.08595200379689534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,fp8,0,0.08306666711966197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,float16,0,0.08561600248018901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,float16,0,0.08383466800053914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,fp8,0,0.0846666693687439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,64,0,1,fp8,fp8,0,0.07666666805744171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,float16,0,0.08710400263468425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,float16,0,0.08338666955629985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,64,128,1,fp8,fp8,0,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,fp8,0,0.0844693382581075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,64,0,1,fp8,fp8,0,0.07866133252779643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,float16,0,0.05133866767088572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,float16,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,float16,0,0.04933866858482361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,fp8,0,0.05162666738033295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,64,128,1,fp8,fp8,0,0.046240001916885376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,64,128,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,fp8,0,0.05132266879081726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,64,0,1,fp8,fp8,0,0.04910400013128916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,float16,0,0.0496373325586319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,fp8,0,0.049173335234324135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,64,0,1,fp8,fp8,0,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,float16,0,0.04970666766166687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,float16,0,0.048021331429481506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,fp8,0,0.049626668294270836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,64,128,1,fp8,fp8,0,0.0462666650613149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,64,128,1,fp8,fp8,0,0.04729066789150238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,fp8,0,0.04934933284918467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,64,0,1,fp8,fp8,0,0.04560000201066335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,float16,0,0.04994666576385498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,float16,0,0.048538664976755776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,fp8,0,0.05060266455014547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,fp8,0,0.04836800197760264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,64,0,1,fp8,fp8,0,0.045610666275024414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,float16,0,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,64,128,1,fp8,fp8,0,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,64,128,1,fp8,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,float16,0,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,64,0,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,float16,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,float16,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,fp8,0,0.03514133393764496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,fp8,0,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,64,0,1,fp8,fp8,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,float16,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,64,128,1,fp8,fp8,0,0.03258133431275686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,fp8,0,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,64,0,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,64,0,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,float16,0,0.03454933315515518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,float16,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,64,128,1,fp8,fp8,0,0.03319466610749563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,fp8,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,float16,0,0.0262773334980011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,fp8,0,0.024122667809327442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,64,128,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,fp8,0,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,64,0,1,fp8,fp8,0,0.023957334458827972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,float16,0,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,64,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,64,128,1,fp8,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,64,0,1,fp8,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,float16,0,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,fp8,0,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,64,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,64,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,64,128,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,fp8,0,0.020560000091791153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,64,0,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,float16,0,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,float16,0,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,64,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,float16,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,float16,0,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,float16,0,0.020090666910012562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,64,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,float16,0,0.582805315653483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,float16,0,0.5821386575698853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,fp8,0,0.5806080102920532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,64,128,1,fp8,fp8,0,0.5576853354771932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,fp8,0,0.5800373156865438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,64,0,1,fp8,fp8,0,0.5568426847457886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,float16,0,0.5842080116271973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,float16,0,0.5847413142522176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,fp8,0,0.5796853303909302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,64,128,1,fp8,fp8,0,0.5670239925384521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,fp8,0,0.5829013188680013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,64,0,1,fp8,fp8,0,0.5676266749699911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,float16,0,0.5950133403142294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,float16,0,0.5940479834874471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,fp8,0,0.5905173222223917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,float16,0,0.32369067271550495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,64,128,1,fp8,fp8,0,0.571669340133667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,fp8,0,0.5855520168940226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,64,128,1,fp8,fp8,0,0.3209386666615804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,float16,0,0.3235466678937276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,64,0,1,fp8,fp8,0,0.5718346834182739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,fp8,0,0.31859733661015827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,fp8,0,0.31859733661015827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,64,0,1,fp8,fp8,0,0.3208640019098918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,float16,0,0.2983306646347046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,float16,0,0.2975253264109294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,fp8,0,0.2972213427225749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,64,128,1,fp8,fp8,0,0.28460800647735596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,fp8,0,0.2968906760215759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,64,128,1,fp8,fp8,0,0.28756799300511676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,64,0,1,fp8,fp8,0,0.28386666377385456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,float16,0,0.2987839976946513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,float16,0,0.2982293367385864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,float16,0,0.3033173282941182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,fp8,0,0.29902400573094684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,fp8,0,0.29816534121831256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,64,0,1,fp8,fp8,0,0.28843732674916583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,float16,0,0.3033439914385478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,fp8,0,0.3035839994748433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,64,128,1,fp8,fp8,0,0.29134400685628253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,fp8,0,0.3014400005340576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,64,0,1,fp8,fp8,0,0.29129066069920856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,fp8,0,0.16660799582799277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,64,0,1,fp8,fp8,0,0.16885334253311157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,float16,0,0.15481066703796387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,float16,0,0.15573867162068686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,float16,0,0.16938134034474692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,float16,0,0.16890132427215576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,fp8,0,0.16685332854588827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,64,128,1,fp8,fp8,0,0.16852800051371256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,fp8,0,0.1562879979610443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,64,128,1,fp8,fp8,0,0.14819199840227762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,fp8,0,0.1551040013631185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,64,0,1,fp8,fp8,0,0.14826132853825888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,float16,0,0.15613866845766702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,float16,0,0.1562933325767517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,fp8,0,0.15460800131162009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,64,128,1,fp8,fp8,0,0.14994666973749796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,fp8,0,0.15636799732844034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,64,0,1,fp8,fp8,0,0.1500320037206014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,float16,0,0.15922666589419046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,float16,0,0.1585813363393148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,fp8,0,0.1585706671079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,float16,0,0.09452266494433086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,64,128,1,fp8,fp8,0,0.15175466736157736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,fp8,0,0.15820800264676413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,64,0,1,fp8,fp8,0,0.1529759963353475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,float16,0,0.0946720043818156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,float16,0,0.08676266670227051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,fp8,0,0.09186666210492452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,fp8,0,0.08687466382980347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,64,128,1,fp8,fp8,0,0.09712533156077068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,fp8,0,0.09294933080673218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,64,0,1,fp8,fp8,0,0.09506666660308838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,float16,0,0.0872213343779246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,float16,0,0.08744532863299052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,float16,0,0.08642133076985677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,64,128,1,fp8,fp8,0,0.08202133576075236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,fp8,0,0.08609066406885783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,64,0,1,fp8,fp8,0,0.08210666477680206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,fp8,0,0.08599467078844707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,64,128,1,fp8,fp8,0,0.0825973351796468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,fp8,0,0.08692266543706258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,64,0,1,fp8,fp8,0,0.08205333352088928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,float16,0,0.08681066830952962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,64,0,1,fp8,fp8,0,0.08335999647776286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,float16,0,0.08796266714731853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,fp8,0,0.08674133817354839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,64,128,1,fp8,fp8,0,0.08239999910195668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,fp8,0,0.08693333466847737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,float16,0,0.05338666836420695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,float16,0,0.05387733379999796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,fp8,0,0.05373333394527435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,64,128,1,fp8,fp8,0,0.05373866856098175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,fp8,0,0.053898667295773826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,64,0,1,fp8,fp8,0,0.052709331115086876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,float16,0,0.051685333251953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,float16,0,0.05081599950790405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,fp8,0,0.051167999704678856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,64,128,1,fp8,fp8,0,0.04930133124192556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,64,128,1,fp8,fp8,0,0.04906133313973745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,fp8,0,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,64,0,1,fp8,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,float16,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,float16,0,0.0517546683549881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,fp8,0,0.051738664507865906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,fp8,0,0.05166399975617727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,64,0,1,fp8,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,float16,0,0.05143466591835022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,float16,0,0.05133333305517832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,fp8,0,0.051594664653142296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,64,128,1,fp8,fp8,0,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,fp8,0,0.05189333359400431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,64,0,1,fp8,fp8,0,0.04961066444714864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,float16,0,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,float16,0,0.03186666717131933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,64,128,1,fp8,fp8,0,0.031850665807724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,fp8,0,0.03290133426586787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,64,0,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,64,0,1,fp8,fp8,0,0.032885332902272545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,float16,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,64,128,1,fp8,fp8,0,0.029861333469549816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,fp8,0,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,float16,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,float16,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,fp8,0,0.03190933416287104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,fp8,0,0.03292799989382426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,64,128,1,fp8,fp8,0,0.03044266750415166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,64,0,1,fp8,fp8,0,0.031178665657838184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,float16,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,64,128,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,fp8,0,0.032399999598662056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,64,0,1,fp8,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,float16,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,64,128,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,64,0,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,float16,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,64,128,1,fp8,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,64,0,1,fp8,fp8,0,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,64,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,64,128,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,64,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,float16,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,fp8,0,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,64,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,float16,0,0.01623999948302905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,64,128,1,fp8,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,float16,0,0.4124586582183838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,float16,0,0.41412798563639325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,fp8,0,0.41225600242614746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,64,128,1,fp8,fp8,0,0.38706668217976886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,fp8,0,0.4123893181482951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,64,0,1,fp8,fp8,0,0.3878186543782552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,float16,0,0.4153706630071004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,float16,0,0.4148266712824504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,fp8,0,0.41253332297007245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,64,128,1,fp8,fp8,0,0.391759991645813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,fp8,0,0.41386131445566815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,float16,0,0.41790934403737384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,64,0,1,fp8,fp8,0,0.3919893503189087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,float16,0,0.41923201084136963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,fp8,0,0.41682668526967365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,64,128,1,fp8,fp8,0,0.39508267243703205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,fp8,0,0.4166933298110962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,float16,0,0.22757333517074585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,64,0,1,fp8,fp8,0,0.3945759932200114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,64,0,1,fp8,fp8,0,0.2206719915072123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,float16,0,0.22613332668940225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,fp8,0,0.22401599089304605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,64,128,1,fp8,fp8,0,0.21997332572937012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,fp8,0,0.22419732809066772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,float16,0,0.21394666035970053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,float16,0,0.21377599239349365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,fp8,0,0.21382933855056763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,64,128,1,fp8,fp8,0,0.19955732425053915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,fp8,0,0.21185600757598877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,64,0,1,fp8,fp8,0,0.19953600565592447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,float16,0,0.21270400285720825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,float16,0,0.21382933855056763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,fp8,0,0.2127466599146525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,64,128,1,fp8,fp8,0,0.2020960052808126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,fp8,0,0.21374932924906412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,64,0,1,fp8,fp8,0,0.20197333892186484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,float16,0,0.21451733509699503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,float16,0,0.2156160076459249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,fp8,0,0.2153759996096293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,64,128,1,fp8,fp8,0,0.20562666654586792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,fp8,0,0.21444799502690634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,64,0,1,fp8,fp8,0,0.20458134015401205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,float16,0,0.12108266353607178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,float16,0,0.12128532926241557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,fp8,0,0.12059733271598816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,64,128,1,fp8,fp8,0,0.12131733695665996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,fp8,0,0.12091733018557231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,64,0,1,fp8,fp8,0,0.12133333086967468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,float16,0,0.1150986651579539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,float16,0,0.11342933773994446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,fp8,0,0.1141866644223531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,64,128,1,fp8,fp8,0,0.10634666681289673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,fp8,0,0.11321600278218587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,64,0,1,fp8,fp8,0,0.10717333356539409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,float16,0,0.11476266384124756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,float16,0,0.11437333623568217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,fp8,0,0.11360533038775127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,float16,0,0.11517332990964253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,64,128,1,fp8,fp8,0,0.10707199573516846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,fp8,0,0.11315199732780457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,64,0,1,fp8,fp8,0,0.10589866836865743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,float16,0,0.11467732985814412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,fp8,0,0.11335466305414836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,64,128,1,fp8,fp8,0,0.10707199573516846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,fp8,0,0.11513066291809082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,64,0,1,fp8,fp8,0,0.1070240040620168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,float16,0,0.06607999900976817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,float16,0,0.06596266726652782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,float16,0,0.06396799782911937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,fp8,0,0.06605866551399231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,64,128,1,fp8,fp8,0,0.06454933186372121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,fp8,0,0.06629866858323415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,64,0,1,fp8,fp8,0,0.06603733201821645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,float16,0,0.06392000118891399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,float16,0,0.06404266754786174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,fp8,0,0.06396799782911937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,64,128,1,fp8,fp8,0,0.06053866446018219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,fp8,0,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,64,0,1,fp8,fp8,0,0.06202666461467743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,float16,0,0.06493333478768666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,fp8,0,0.06443733473618825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,64,128,1,fp8,fp8,0,0.06100266675154368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,fp8,0,0.06426133215427399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,64,0,1,fp8,fp8,0,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,float16,0,0.065610667069753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,float16,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,fp8,0,0.06607999900976817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,64,128,1,fp8,fp8,0,0.06126933296521505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,fp8,0,0.0639519989490509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,64,0,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,float16,0,0.040789333482583366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,64,128,1,fp8,fp8,0,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,64,128,1,fp8,fp8,0,0.03800000001986822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,fp8,0,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,64,0,1,fp8,fp8,0,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,float16,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,float16,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,float16,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,fp8,0,0.03956799954175949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,64,0,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,float16,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,64,128,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,64,0,1,fp8,fp8,0,0.03809600075085958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,64,0,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,float16,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,float16,0,0.03932799895604452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,fp8,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,64,128,1,fp8,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,float16,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,64,128,1,fp8,fp8,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,64,0,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,fp8,0,0.025920001169045765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,64,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,float16,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,float16,0,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,64,128,1,fp8,fp8,0,0.026943999032179516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,float16,0,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,64,128,1,fp8,fp8,0,0.02590399980545044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,64,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,float16,0,0.0206986665725708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,64,128,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,64,0,1,fp8,fp8,0,0.019808000574509304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,64,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,64,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,fp8,0,0.020314666132132213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,fp8,0,0.020186666399240494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,64,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,float16,0,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,fp8,0,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,float16,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,64,128,1,fp8,fp8,0,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,64,0,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,float16,0,0.32849599917729694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,float16,0,0.33052800099054974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,fp8,0,0.32977600892384845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,64,128,1,fp8,fp8,0,0.30399467547734577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,fp8,0,0.32871466875076294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,fp8,0,0.32849599917729694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,64,128,1,fp8,fp8,0,0.3063146670659383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,64,0,1,fp8,fp8,0,0.30502933263778687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,float16,0,0.32849599917729694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,float16,0,0.32970666885375977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,fp8,0,0.32847466071446735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,fp8,0,0.330186665058136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,64,0,1,fp8,fp8,0,0.3070346713066101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,float16,0,0.331221342086792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,float16,0,0.33218133449554443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,64,128,1,fp8,fp8,0,0.30989332993825275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,fp8,0,0.17896533012390137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,fp8,0,0.32983465989430744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,float16,0,0.17976532379786173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,64,0,1,fp8,fp8,0,0.309717337290446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,float16,0,0.17898666858673096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,64,128,1,fp8,fp8,0,0.1733013391494751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,fp8,0,0.1787359913190206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,64,0,1,fp8,fp8,0,0.17416000366210938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,float16,0,0.17265599966049194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,float16,0,0.1725119948387146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,fp8,0,0.17248533169428507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,64,128,1,fp8,fp8,0,0.15849600235621134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,fp8,0,0.17145599921544394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,64,0,1,fp8,fp8,0,0.15929067134857178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,fp8,0,0.17083199818929037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,float16,0,0.17287466923395792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,float16,0,0.1717546582221985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,fp8,0,0.17251733938852945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,fp8,0,0.1724053422609965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,64,128,1,fp8,fp8,0,0.15894400080045065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,64,128,1,fp8,fp8,0,0.1593173344930013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,64,0,1,fp8,fp8,0,0.15948800245920816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,64,0,1,fp8,fp8,0,0.15896000464757284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,float16,0,0.17324266831080118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,float16,0,0.17271999518076578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,fp8,0,0.172325332959493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,float16,0,0.09411733349164327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,float16,0,0.09317866961161296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,fp8,0,0.09479999542236328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,64,128,1,fp8,fp8,0,0.08924266695976257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,fp8,0,0.09406933188438416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,64,128,1,fp8,fp8,0,0.0864533285299937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,64,0,1,fp8,fp8,0,0.08992532889048259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,float16,0,0.09196266531944275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,float16,0,0.09257066249847412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,fp8,0,0.09127466877301534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,fp8,0,0.09152000149091084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,64,0,1,fp8,fp8,0,0.08545066912968953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,float16,0,0.09267200032869975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,float16,0,0.09082667032877605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,fp8,0,0.09202133615811665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,64,128,1,fp8,fp8,0,0.08594133456548055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,fp8,0,0.09257066249847412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,fp8,0,0.09265599648157756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,64,0,1,fp8,fp8,0,0.08624000350634257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,float16,0,0.09197866916656494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,float16,0,0.09225599964459737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,fp8,0,0.055125330885251365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,64,128,1,fp8,fp8,0,0.08695466319719951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,fp8,0,0.09172800183296204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,64,0,1,fp8,fp8,0,0.08655466636021932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,float16,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,float16,0,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,64,128,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,fp8,0,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,64,0,1,fp8,fp8,0,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,float16,0,0.053871999184290566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,float16,0,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,float16,0,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,float16,0,0.05372266471385956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,fp8,0,0.053445334235827126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,64,128,1,fp8,fp8,0,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,64,0,1,fp8,fp8,0,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,float16,0,0.05268266797065735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,64,128,1,fp8,fp8,0,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,fp8,0,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,64,0,1,fp8,fp8,0,0.05102399984995524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,float16,0,0.05298133194446564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,64,128,1,fp8,fp8,0,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,64,0,1,fp8,fp8,0,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,64,0,1,fp8,fp8,0,0.03603733330965042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,float16,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,float16,0,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,64,128,1,fp8,fp8,0,0.033914667864640556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,64,128,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,float16,0,0.03521066655715307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,float16,0,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,64,0,1,fp8,fp8,0,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,float16,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,float16,0,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,64,128,1,fp8,fp8,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,fp8,0,0.03538133452335993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,64,0,1,fp8,fp8,0,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,float16,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,64,128,1,fp8,fp8,0,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,64,128,1,fp8,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,64,0,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,fp8,0,0.023658665517965954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,64,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,64,128,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,fp8,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,64,0,1,fp8,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,float16,0,0.02386133372783661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,64,0,1,fp8,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,64,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,64,128,1,fp8,fp8,0,0.022426667312781017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,64,128,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,64,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,64,128,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,64,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,float16,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,64,0,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,64,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,float16,0,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,float16,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,fp8,0,0.01635733370979627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,64,128,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,fp8,0,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,64,128,1,fp8,fp8,0,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,float16,0,0.2868799964586894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,float16,0,0.286901334921519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,fp8,0,0.28754132986068726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,64,128,1,fp8,fp8,0,0.26377065976460773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,fp8,0,0.28707200288772583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,64,0,1,fp8,fp8,0,0.2648213307062785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,64,128,1,fp8,fp8,0,0.2662079930305481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,float16,0,0.2874506711959839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,float16,0,0.2874880035718282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,fp8,0,0.28724799553553265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,fp8,0,0.28758400678634644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,64,0,1,fp8,fp8,0,0.26478399833043414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,float16,0,0.28727465867996216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,float16,0,0.2871466676394145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,fp8,0,0.287882665793101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,64,128,1,fp8,fp8,0,0.2666986584663391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,fp8,0,0.28731733560562134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,float16,0,0.15094932913780212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,64,0,1,fp8,fp8,0,0.26637333631515503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,float16,0,0.1506186624368032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,fp8,0,0.1502346694469452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,64,128,1,fp8,fp8,0,0.14285332957903543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,fp8,0,0.15043200055758157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,64,0,1,fp8,fp8,0,0.14421332875887552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,float16,0,0.14802133043607077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,float16,0,0.1483626663684845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,64,0,1,fp8,fp8,0,0.13987200458844504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,fp8,0,0.14844800035158792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,64,128,1,fp8,fp8,0,0.13946666320165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,fp8,0,0.14824533462524414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,float16,0,0.1493333379427592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,fp8,0,0.14870933691660562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,float16,0,0.1482080022493998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,fp8,0,0.14833066860834757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,64,128,1,fp8,fp8,0,0.1402773360411326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,64,0,1,fp8,fp8,0,0.13981866836547852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,float16,0,0.150026669104894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,float16,0,0.14870933691660562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,fp8,0,0.14829867084821066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,float16,0,0.08239999910195668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,64,128,1,fp8,fp8,0,0.13986666997273764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,fp8,0,0.1500640014807383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,64,0,1,fp8,fp8,0,0.07831466694672902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,64,0,1,fp8,fp8,0,0.1405173341433207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,float16,0,0.08243200182914734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,fp8,0,0.08106133341789246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,64,128,1,fp8,fp8,0,0.07727466523647308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,fp8,0,0.08140266438325246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,float16,0,0.08070933322111766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,float16,0,0.08057599763075511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,float16,0,0.08140799899895985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,fp8,0,0.08131200075149536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,64,128,1,fp8,fp8,0,0.07632533212502797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,64,128,1,fp8,fp8,0,0.07735466460386912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,fp8,0,0.08041599889596303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,64,0,1,fp8,fp8,0,0.07629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,float16,0,0.0806933343410492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,fp8,0,0.08081066608428955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,64,128,1,fp8,fp8,0,0.07730666796366374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,fp8,0,0.08051733175913493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,fp8,0,0.08109866579373677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,float16,0,0.049733335773150124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,64,0,1,fp8,fp8,0,0.07628799974918365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,float16,0,0.08236266672611237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,float16,0,0.08118399977684021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,fp8,0,0.08075733482837677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,64,0,1,fp8,fp8,0,0.07652799785137177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,float16,0,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,64,128,1,fp8,fp8,0,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,fp8,0,0.04827199876308441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,64,0,1,fp8,fp8,0,0.04701333244641622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,64,0,1,fp8,fp8,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,float16,0,0.05013866722583771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,float16,0,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,fp8,0,0.04869333406289419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,64,128,1,fp8,fp8,0,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,fp8,0,0.04877333343029022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,float16,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,float16,0,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,fp8,0,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,64,128,1,fp8,fp8,0,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,64,128,1,fp8,fp8,0,0.045642669002215065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,fp8,0,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,64,0,1,fp8,fp8,0,0.04587199787298838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,float16,0,0.04766400158405304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,float16,0,0.04867733518282572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,fp8,0,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,64,0,1,fp8,fp8,0,0.04656533400217692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,64,0,1,fp8,fp8,0,0.031178665657838184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,float16,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,64,128,1,fp8,fp8,0,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,float16,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,float16,0,0.031125334401925404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,64,128,1,fp8,fp8,0,0.030026666820049286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,64,128,1,fp8,fp8,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,fp8,0,0.031109333038330078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,64,0,1,fp8,fp8,0,0.029829333225886028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,float16,0,0.030165334542592365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,float16,0,0.030554667115211487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,64,0,1,fp8,fp8,0,0.03028800090154012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,float16,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,64,128,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,64,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,float16,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,64,128,1,fp8,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,64,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,float16,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,float16,0,0.02237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,64,0,1,fp8,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,64,128,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,64,0,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,64,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,64,128,1,fp8,fp8,0,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,float16,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,64,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,64,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,64,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,64,128,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,64,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,64,128,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,float16,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,64,128,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,64,0,1,fp8,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,float16,0,2.358837286631266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,64,128,1,fp8,fp8,0,2.1629600524902344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,fp8,0,2.374255975087484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,float16,0,2.3765172958374023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,fp8,0,2.3923519452412925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,64,128,1,fp8,fp8,0,2.183429400126139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,float16,0,2.410389264424642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,fp8,0,2.428805351257324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,64,0,1,fp8,fp8,0,13.2096799214681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,float16,0,14.297045389811197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,fp8,0,14.296122233072916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,float16,0,14.334587097167969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,64,128,1,fp8,fp8,0,2.224986712137858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,64,0,1,fp8,fp8,0,13.237648010253906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,float16,0,1.3735520044962566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,fp8,0,14.31722640991211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,fp8,0,1.4028266270955403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,64,128,1,fp8,fp8,0,1.308736006418864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,float16,0,14.370501200358072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,float16,0,1.2240853309631348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,fp8,0,1.233690659205119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,64,128,1,fp8,fp8,0,1.1254613399505615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,float16,0,7.449450810750325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,64,0,1,fp8,fp8,0,6.878639856974284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,fp8,0,7.488613128662109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,float16,0,1.2302026748657227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,float16,0,7.254090627034505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,fp8,0,1.2417866388956706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,64,128,1,fp8,fp8,0,1.133951981862386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,64,0,1,fp8,fp8,0,13.254805246988932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,fp8,0,14.402549743652344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,64,0,1,fp8,fp8,0,6.6989491780598955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,float16,0,1.2462773323059082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,fp8,0,7.250677108764648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,fp8,0,1.2597706317901611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,64,128,1,fp8,fp8,0,1.1542080243428547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,float16,0,0.7501599788665771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,float16,0,7.254757563273112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,fp8,0,0.7692586580912272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,64,128,1,fp8,fp8,0,0.7229706446329752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,64,0,1,fp8,fp8,0,6.710821151733398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,fp8,0,7.28004264831543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,float16,0,0.6817226409912109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,float16,0,3.851210594177246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,float16,0,7.281263987223308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,fp8,0,0.6872426668802897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,64,128,1,fp8,fp8,0,0.6332586606343588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,64,0,1,fp8,fp8,0,6.723781585693359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,fp8,0,7.295935948689778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,64,0,1,fp8,fp8,0,3.560208002726237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,fp8,0,3.8717546463012695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,float16,0,0.6848159631093343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,float16,0,3.755093256632487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,fp8,0,0.6909600098927816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,64,128,1,fp8,fp8,0,0.6389866669972738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,float16,0,0.693733294804891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,fp8,0,3.761119842529297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,fp8,0,0.6997333367665609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,64,0,1,fp8,fp8,0,3.4765332539876304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,64,128,1,fp8,fp8,0,0.6482293208440145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,float16,0,3.7535839080810547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,float16,0,0.49833067258199054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,64,0,1,fp8,fp8,0,3.4778451919555664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,fp8,0,3.7681760787963867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,fp8,0,0.4984906514485677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,64,128,1,fp8,fp8,0,0.46563732624053955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,float16,0,3.768261273701986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,float16,0,0.49807465076446533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,float16,0,2.109872023264567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,fp8,0,0.49807465076446533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,64,0,1,fp8,fp8,0,3.4831040700276694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,fp8,0,3.775045394897461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,64,128,1,fp8,fp8,0,0.4657813310623169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,64,0,1,fp8,fp8,0,1.9397759437561035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,fp8,0,2.1059679985046387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,float16,0,2.0938720703125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,float16,0,0.49668800830841064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,fp8,0,0.49878935019175213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,64,128,1,fp8,fp8,0,0.4654719829559326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,64,0,1,fp8,fp8,0,1.9372480710347493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,fp8,0,2.096010684967041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,float16,0,0.49666134516398114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,fp8,0,0.49728532632191974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,float16,0,2.0908800760904946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,64,128,1,fp8,fp8,0,0.4639519850413005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,fp8,0,2.0947200457255044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,64,0,1,fp8,fp8,0,1.9426506360371907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,float16,0,2.094389279683431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,fp8,0,2.102575937906901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,64,0,1,fp8,fp8,0,1.9356907208760579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,float16,0,1.7463946342468262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,fp8,0,1.7597653071085613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,64,128,1,fp8,fp8,0,1.6031413078308105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,float16,0,1.759552001953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,fp8,0,1.7757546106974285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,64,128,1,fp8,fp8,0,1.61738125483195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,float16,0,1.7842826843261719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,float16,0,8.408650716145834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,fp8,0,1.7998773256937664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,64,0,1,fp8,fp8,0,7.776842753092448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,fp8,0,8.437018712361654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,64,128,1,fp8,fp8,0,1.6510772705078125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,float16,0,8.424970626831055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,float16,0,1.030186653137207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,fp8,0,8.450906753540039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,64,0,1,fp8,fp8,0,7.802357355753581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,fp8,0,1.0520213445027669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,64,128,1,fp8,fp8,0,0.9819200038909912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,float16,0,8.474933624267578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,float16,0,0.9191146691640218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,fp8,0,0.9269066651662191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,float16,0,4.443205197652181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,fp8,0,4.471280097961426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,64,128,1,fp8,fp8,0,0.8463892936706543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,64,0,1,fp8,fp8,0,4.119183858235677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,64,0,1,fp8,fp8,0,7.822869618733724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,fp8,0,8.498506546020508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,float16,0,0.9234613577524821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,float16,0,4.294261296590169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,fp8,0,0.9325066407521566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,64,128,1,fp8,fp8,0,0.8546293576558431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,float16,0,0.9352320035298666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,fp8,0,4.333189328511556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,64,0,1,fp8,fp8,0,3.9745651880900064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,fp8,0,0.9453120231628418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,float16,0,4.313199996948242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,64,128,1,fp8,fp8,0,0.8687679767608643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,float16,0,0.5653973420461019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,64,0,1,fp8,fp8,0,3.978970527648926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,fp8,0,0.5793759822845459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,64,128,1,fp8,fp8,0,0.5475733280181885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,fp8,0,4.309141476949056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,float16,0,4.329114596048991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,float16,0,0.5134239991505941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,float16,0,2.3197919527689614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,64,0,1,fp8,fp8,0,3.997018814086914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,fp8,0,0.5168319940567017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,fp8,0,4.331301371256511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,64,128,1,fp8,fp8,0,0.480128010114034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,fp8,0,2.3308800061543784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,64,0,1,fp8,fp8,0,2.1519039471944175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,float16,0,2.243237336476644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,float16,0,0.5168106555938721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,fp8,0,0.5204746723175049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,64,128,1,fp8,fp8,0,0.4824106693267822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,fp8,0,2.254864056905111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,float16,0,0.5228319962819418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,64,0,1,fp8,fp8,0,2.0854667027791343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,fp8,0,0.5271626710891724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,float16,0,2.2620320320129395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,64,128,1,fp8,fp8,0,0.48959465821584064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,64,0,1,fp8,fp8,0,2.0849973360697427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,fp8,0,2.254144032796224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,float16,0,0.3740533192952474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,fp8,0,0.3769599994023641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,float16,0,2.2630720138549805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,64,128,1,fp8,fp8,0,0.3532480001449585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,float16,0,1.2938026587168376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,float16,0,0.37518401940663654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,64,0,1,fp8,fp8,0,2.095424016316732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,fp8,0,2.269920031229655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,fp8,0,0.3734026749928792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,fp8,0,1.2955466906229656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,64,128,1,fp8,fp8,0,0.3489439884821574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,64,0,1,fp8,fp8,0,1.19922669728597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,float16,0,1.2881866296132405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,float16,0,0.37455999851226807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,fp8,0,0.37385066350301105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,64,128,1,fp8,fp8,0,0.351088007291158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,fp8,0,1.2908906936645508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,float16,0,1.285871982574463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,64,0,1,fp8,fp8,0,1.195093313852946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,float16,0,0.37409599622090656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,fp8,0,0.37485333283742267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,fp8,0,1.2936320304870605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,64,0,1,fp8,fp8,0,1.1955466270446777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,64,128,1,fp8,fp8,0,0.3520960013071696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,float16,0,1.2893386681874592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,fp8,0,1.289632002512614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,64,0,1,fp8,fp8,0,1.1957653363545735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,float16,0,1.4520692825317383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,fp8,0,1.4650506973266602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,64,128,1,fp8,fp8,0,1.3318400382995605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,float16,0,1.4604585965474446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,fp8,0,1.4747412999471028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,64,128,1,fp8,fp8,0,1.3419626553853352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,float16,0,1.4819572766621907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,float16,0,6.062538782755534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,64,0,1,fp8,fp8,0,5.5948638916015625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,fp8,0,6.077648162841797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,fp8,0,1.4970879554748535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,float16,0,6.075765609741211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,64,128,1,fp8,fp8,0,1.367850621541341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,fp8,0,6.093162536621094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,float16,0,0.8605120182037354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,64,0,1,fp8,fp8,0,5.60153071085612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,fp8,0,0.8781226476033529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,64,128,1,fp8,fp8,0,0.8199893633524576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,float16,0,6.102783838907878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,float16,0,0.7682826519012451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,float16,0,3.2262932459513345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,fp8,0,0.7750773429870605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,64,128,1,fp8,fp8,0,0.7095839977264404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,fp8,0,3.2475147247314453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,64,0,1,fp8,fp8,0,2.9849440256754556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,64,0,1,fp8,fp8,0,5.641743977864583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,float16,0,0.7741440137227377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,fp8,0,6.115685145060222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,float16,0,3.1091521581014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,fp8,0,0.780789295832316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,64,128,1,fp8,fp8,0,0.7171359856923422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,64,0,1,fp8,fp8,0,2.872629483540853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,fp8,0,3.1157760620117188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,float16,0,0.7828053633371989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,float16,0,3.1183465321858725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,fp8,0,0.7933386961619059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,64,128,1,fp8,fp8,0,0.7261973222096761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,fp8,0,3.117568016052246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,64,0,1,fp8,fp8,0,2.883877436319987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,float16,0,0.47556265195210773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,fp8,0,0.4862133264541626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,float16,0,3.135573387145996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,64,128,1,fp8,fp8,0,0.4593813419342041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,float16,0,1.6953493754069011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,float16,0,0.4309226671854655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,fp8,0,3.1343892415364585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,fp8,0,1.7061546643575032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,64,0,1,fp8,fp8,0,2.895573298136393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,64,0,1,fp8,fp8,0,1.5751412709554036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,fp8,0,0.4349120060602824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,64,128,1,fp8,fp8,0,0.40245866775512695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,float16,0,1.6396533648173015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,float16,0,0.43488534291585285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,fp8,0,0.4371093511581421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,64,128,1,fp8,fp8,0,0.406277338663737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,fp8,0,1.6381440162658691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,64,0,1,fp8,fp8,0,1.5188053448994954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,float16,0,0.43824533621470135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,float16,0,1.6379893620808919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,fp8,0,0.4445919990539551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,64,128,1,fp8,fp8,0,0.41230932871500653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,fp8,0,1.6474720637003581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,64,0,1,fp8,fp8,0,1.5264159838358562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,float16,0,0.31755733489990234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,fp8,0,0.31618134180704754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,float16,0,1.650496006011963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,64,128,1,fp8,fp8,0,0.2956426739692688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,fp8,0,1.653450647989909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,float16,0,0.9642293453216553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,64,0,1,fp8,fp8,0,1.5290026664733887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,64,0,1,fp8,fp8,0,0.8891572952270508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,float16,0,0.31403199831644696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,fp8,0,0.967141310373942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,fp8,0,0.31307733058929443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,64,128,1,fp8,fp8,0,0.2937013308207194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,float16,0,0.9584480126698812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,float16,0,0.3145013252894084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,fp8,0,0.9552373091379801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,fp8,0,0.31483733654022217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,64,0,1,fp8,fp8,0,0.889461358388265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,64,128,1,fp8,fp8,0,0.29375465710957843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,float16,0,0.9547253449757894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,fp8,0,0.3141333262125651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,float16,0,0.3160159985224406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,fp8,0,0.9613973299662272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,64,0,1,fp8,fp8,0,0.8914453188578287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,64,128,1,fp8,fp8,0,0.2953813274701436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,float16,0,0.9578186670939127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,fp8,0,0.9619519710540771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,64,0,1,fp8,fp8,0,0.8875626722971598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,float16,0,2.2957280476888022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,64,128,1,fp8,fp8,0,2.102992057800293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,fp8,0,2.311194737752279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,float16,0,2.309648036956787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,fp8,0,2.3274505933125815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,64,128,1,fp8,fp8,0,2.1223732630411782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,float16,0,2.3493067423502603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,float16,0,8.079930623372396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,64,0,1,fp8,fp8,0,7.453813552856445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,fp8,0,8.103290557861328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,float16,0,8.099034627278646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,64,128,1,fp8,fp8,0,2.1608640352884927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,fp8,0,2.368501345316569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,float16,0,1.31112535794576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,64,0,1,fp8,fp8,0,7.4830881754557295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,fp8,0,8.121920267740885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,fp8,0,1.3411733309427898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,64,128,1,fp8,fp8,0,1.246677319208781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,float16,0,8.172527949015299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,float16,0,1.1621973514556885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,float16,0,4.286490758260091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,fp8,0,1.1723360220591228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,64,128,1,fp8,fp8,0,1.0652693112691243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,64,0,1,fp8,fp8,0,3.956197420756022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,fp8,0,4.311573346455892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,64,0,1,fp8,fp8,0,7.524346669514974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,fp8,0,8.175274531046549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,float16,0,4.078714688618978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,float16,0,1.1708587010701497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,fp8,0,1.1820586522420247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,64,128,1,fp8,fp8,0,1.0743733247121174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,float16,0,1.1857759952545166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,64,0,1,fp8,fp8,0,3.774165471394857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,fp8,0,1.1993280251820881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,fp8,0,4.105962753295898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,float16,0,4.095882733662923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,64,128,1,fp8,fp8,0,1.0950453281402588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,float16,0,0.6885493596394857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,fp8,0,4.104885419209798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,64,0,1,fp8,fp8,0,3.774266560872396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,fp8,0,0.7061066627502441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,float16,0,4.125098546346028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,64,128,1,fp8,fp8,0,0.6583253145217896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,float16,0,2.198176066080729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,float16,0,0.6165493329366049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,64,0,1,fp8,fp8,0,3.8008639017740884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,fp8,0,2.2117813428243003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,fp8,0,4.140581448872884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,fp8,0,0.6291786829630533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,64,0,1,fp8,fp8,0,2.043274720509847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,64,128,1,fp8,fp8,0,0.5718400080998739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,float16,0,2.105466683705648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,float16,0,0.621338685353597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,fp8,0,0.6274133523305258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,64,128,1,fp8,fp8,0,0.5765813191731771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,fp8,0,2.1134559313456216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,64,0,1,fp8,fp8,0,1.9470240275065105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,float16,0,0.6293973525365194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,float16,0,2.113877296447754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,fp8,0,0.6360853513081869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,64,128,1,fp8,fp8,0,0.5845226844151815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,fp8,0,2.118746598561605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,64,0,1,fp8,fp8,0,1.9589653015136719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,float16,0,0.3850560188293457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,float16,0,2.128058592478434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,fp8,0,0.39427733421325684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,64,128,1,fp8,fp8,0,0.3715360164642334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,float16,0,1.1674506664276123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,float16,0,0.34755198160807294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,fp8,0,2.1360960006713867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,64,0,1,fp8,fp8,0,1.9630026817321777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,fp8,0,0.3511093457539876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,fp8,0,1.1780266761779785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,64,0,1,fp8,fp8,0,1.0875519911448162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,64,128,1,fp8,fp8,0,0.3264266649881999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,float16,0,1.1197386582692463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,float16,0,0.35096534093221027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,64,0,1,fp8,fp8,0,1.0426560242970784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,fp8,0,0.35364266236623126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,fp8,0,1.1251413027445476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,64,128,1,fp8,fp8,0,0.3282453417778015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,float16,0,1.125162680943807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,float16,0,0.3547626733779907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,fp8,0,1.1299786567687988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,fp8,0,0.3596160014470418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,64,0,1,fp8,fp8,0,1.0434186458587646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,64,128,1,fp8,fp8,0,0.33397332827250165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,float16,0,1.1339680353800456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,fp8,0,0.2576853235562642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,float16,0,0.25887999931971234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,fp8,0,1.135690689086914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,64,128,1,fp8,fp8,0,0.24043200413386026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,float16,0,0.6799306869506836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,64,0,1,fp8,fp8,0,1.0495839913686116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,float16,0,0.2545173366864522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,float16,0,0.6754986445109049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,fp8,0,0.6792639891306559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,64,0,1,fp8,fp8,0,0.6271680196126302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,fp8,0,0.2545119921366374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,64,128,1,fp8,fp8,0,0.23868266741434732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,fp8,0,0.6722133159637451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,float16,0,0.2545386751492818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,64,0,1,fp8,fp8,0,0.6241226593653361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,fp8,0,0.25470399856567383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,fp8,0,0.6727733612060547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,float16,0,0.6732532978057861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,64,128,1,fp8,fp8,0,0.24010133743286133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,64,128,1,fp8,fp8,0,0.24250666300455728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,float16,0,0.25437333186467487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,64,0,1,fp8,fp8,0,0.6245546738306681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,fp8,0,0.2555413246154785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,float16,0,0.6746506690979004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,fp8,0,0.6745440165201823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,64,0,1,fp8,fp8,0,0.6281813383102417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,float16,0,1.6999732653299968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,64,128,1,fp8,fp8,0,1.5524533589680989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,fp8,0,1.7137120564778645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,float16,0,1.7079680760701497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,fp8,0,1.725861390431722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,64,128,1,fp8,fp8,0,1.568160057067871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,float16,0,4.888693491617839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,64,0,1,fp8,fp8,0,4.511210759480794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,fp8,0,4.8939361572265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,float16,0,1.7368480364481609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,float16,0,4.90283743540446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,fp8,0,1.751541296641032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,64,128,1,fp8,fp8,0,1.6037866274515789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,fp8,0,4.92146650950114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,float16,0,0.9853973388671875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,64,0,1,fp8,fp8,0,4.518917401631673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,fp8,0,1.0078933238983154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,64,128,1,fp8,fp8,0,0.936240037282308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,float16,0,4.936117490132649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,float16,0,2.6256319681803384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,float16,0,0.8742079734802246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,64,0,1,fp8,fp8,0,2.4329919815063477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,fp8,0,2.644949277242025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,fp8,0,0.8804639975229899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,fp8,0,4.952874819437663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,64,0,1,fp8,fp8,0,4.56218147277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,64,128,1,fp8,fp8,0,0.8018240133921305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,float16,0,2.490410645802816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,float16,0,0.8798666795094808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,fp8,0,0.8894506295522054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,64,128,1,fp8,fp8,0,0.8094507058461508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,fp8,0,2.5000747044881186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,64,0,1,fp8,fp8,0,2.2963306109110513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,float16,0,0.8930133183797201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,float16,0,2.497157255808512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,fp8,0,0.9007946650187174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,fp8,0,2.5083200136820474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,64,128,1,fp8,fp8,0,0.8222453594207764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,64,0,1,fp8,fp8,0,2.3042027155558267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,float16,0,0.5204799969991049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,float16,0,2.518474737803141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,fp8,0,0.5316586494445801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,64,128,1,fp8,fp8,0,0.49806400140126544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,float16,0,1.3611733118693035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,float16,0,0.4644373257954915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,fp8,0,2.528794606526693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,64,0,1,fp8,fp8,0,2.3184000651041665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,fp8,0,1.376479943593343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,fp8,0,0.4695626497268677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,64,0,1,fp8,fp8,0,1.2688266436258953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,64,128,1,fp8,fp8,0,0.4308053255081177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,float16,0,1.2944800059000652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,float16,0,0.469376007715861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,fp8,0,0.4737066825230916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,64,128,1,fp8,fp8,0,0.43623467286427814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,fp8,0,1.300826629002889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,64,0,1,fp8,fp8,0,1.2000053723653157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,float16,0,1.298197348912557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,float16,0,0.47380268573760986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,fp8,0,0.4793013334274292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,64,128,1,fp8,fp8,0,0.44172267119089764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,fp8,0,1.304421345392863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,float16,0,0.2901279926300049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,64,0,1,fp8,fp8,0,1.202730655670166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,float16,0,1.309557358423869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,64,128,1,fp8,fp8,0,0.2818666696548462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,fp8,0,0.2977280020713806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,fp8,0,1.31277863184611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,float16,0,0.73471466700236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,64,0,1,fp8,fp8,0,1.2093653678894043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,float16,0,0.25911466280619305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,fp8,0,0.742677370707194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,64,0,1,fp8,fp8,0,0.6867360273996989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,fp8,0,0.2608106732368469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,float16,0,0.2608319918314616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,float16,0,0.6945813496907552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,64,128,1,fp8,fp8,0,0.24662399291992188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,64,128,1,fp8,fp8,0,0.2486720085144043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,fp8,0,0.7007413705190023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,64,0,1,fp8,fp8,0,0.6501919825871786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,fp8,0,0.26472532749176025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,fp8,0,0.7025226751963297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,float16,0,0.7019519805908203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,float16,0,0.26717867453893024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,64,0,1,fp8,fp8,0,0.6526666482289633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,fp8,0,0.2707680066426595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,float16,0,0.1992853283882141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,64,128,1,fp8,fp8,0,0.2527466615041097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,float16,0,0.7088800271352133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,fp8,0,0.711359977722168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,64,0,1,fp8,fp8,0,0.657696008682251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,fp8,0,0.1988746722539266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,float16,0,0.44011199474334717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,64,128,1,fp8,fp8,0,0.18754667043685913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,fp8,0,0.4408213297526042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,64,0,1,fp8,fp8,0,0.40675731499989826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,float16,0,0.19536532958348593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,float16,0,0.4367733399073283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,fp8,0,0.19522666931152344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,64,128,1,fp8,fp8,0,0.18313600619633993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,fp8,0,0.19549866517384848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,fp8,0,0.4347413380940755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,float16,0,0.19405333201090494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,64,0,1,fp8,fp8,0,0.40622933705647785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,float16,0,0.43411731719970703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,64,128,1,fp8,fp8,0,0.18470933039983115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,fp8,0,0.43884265422821045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,64,0,1,fp8,fp8,0,0.4042826493581136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,float16,0,0.1967946688334147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,fp8,0,0.4347093502680461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,64,0,1,fp8,fp8,0,0.40704532464345294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,float16,0,0.4352533419926961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,fp8,0,0.1946720083554586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,64,128,1,fp8,fp8,0,0.18541866540908813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,float16,0,2.261242707570394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,64,128,1,fp8,fp8,0,2.065114657084147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,fp8,0,2.276495933532715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,float16,0,2.2776907285054526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,fp8,0,2.2933600743611655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,float16,0,4.927509307861328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,64,128,1,fp8,fp8,0,2.0844799677530923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,64,0,1,fp8,fp8,0,4.543397267659505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,fp8,0,4.946821212768555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,float16,0,4.957418759663899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,float16,0,2.3151466051737466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,fp8,0,4.974202791849772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,fp8,0,2.3297972679138184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,64,128,1,fp8,fp8,0,2.129514694213867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,64,0,1,fp8,fp8,0,4.5729068120320635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,float16,0,1.2821333408355713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,fp8,0,1.305183966954549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,64,128,1,fp8,fp8,0,1.214576005935669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,float16,0,4.995759963989258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,fp8,0,5.016602516174316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,float16,0,2.650544007619222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,float16,0,1.130341370900472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,fp8,0,1.1386720339457195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,64,0,1,fp8,fp8,0,4.614010810852051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,64,128,1,fp8,fp8,0,1.0306986967722576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,fp8,0,2.680490811665853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,64,0,1,fp8,fp8,0,2.4677173296610513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,float16,0,2.475482622782389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,float16,0,1.136794646581014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,fp8,0,1.1457599798838298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,64,128,1,fp8,fp8,0,1.040826638539632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,fp8,0,2.4926400184631348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,64,0,1,fp8,fp8,0,2.2871146202087402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,float16,0,2.49073060353597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,float16,0,1.1524159908294678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,64,0,1,fp8,fp8,0,2.2917280197143555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,fp8,0,1.1657226880391438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,fp8,0,2.4973546663920083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,64,128,1,fp8,fp8,0,1.0625066757202148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,float16,0,0.6601813236872355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,fp8,0,0.6753066380818685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,float16,0,2.5086827278137207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,64,128,1,fp8,fp8,0,0.6281706492106119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,float16,0,1.3600586255391438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,fp8,0,2.5234293937683105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,64,0,1,fp8,fp8,0,2.316703955332438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,float16,0,0.5869013468424479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,64,0,1,fp8,fp8,0,1.2688533465067546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,fp8,0,1.3767679532368977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,fp8,0,0.5917066733042399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,64,128,1,fp8,fp8,0,0.5396693150202433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,float16,0,1.2749919891357422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,float16,0,0.5922293265660604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,fp8,0,1.2808213233947754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,fp8,0,0.5966399908065796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,64,0,1,fp8,fp8,0,1.180506706237793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,64,128,1,fp8,fp8,0,0.5452106793721517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,float16,0,1.2813599904378254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,float16,0,0.6001173257827759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,fp8,0,1.2871039708455403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,fp8,0,0.6071466604868571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,64,0,1,fp8,fp8,0,1.1831893126169841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,64,128,1,fp8,fp8,0,0.5531680186589559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,float16,0,0.354751984278361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,float16,0,1.2919946511586506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,float16,0,0.7164426644643148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,fp8,0,0.36298668384552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,64,0,1,fp8,fp8,0,1.192522684733073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,fp8,0,1.2979893684387207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,64,128,1,fp8,fp8,0,0.3385386864344279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,fp8,0,0.7257973353068033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,float16,0,0.3139786720275879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,64,0,1,fp8,fp8,0,0.6732532978057861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,fp8,0,0.318015992641449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,64,0,1,fp8,fp8,0,0.6262506643931071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,float16,0,0.6720106601715088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,64,128,1,fp8,fp8,0,0.2937813401222229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,fp8,0,0.6746133168538412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,float16,0,0.3176533381144206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,fp8,0,0.32019199927647907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,float16,0,0.6775519847869873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,64,128,1,fp8,fp8,0,0.29637332757314044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,fp8,0,0.3261866569519043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,fp8,0,0.678442637125651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,float16,0,0.3224426706631978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,64,0,1,fp8,fp8,0,0.6281546751658121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,float16,0,0.6808319886525472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,64,128,1,fp8,fp8,0,0.3017973303794861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,float16,0,0.20017067591349283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,fp8,0,0.6867360273996989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,64,0,1,fp8,fp8,0,0.6353866656621298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,fp8,0,0.20363199710845947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,float16,0,0.3961333433787028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,64,128,1,fp8,fp8,0,0.19369065761566162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,fp8,0,0.4018079837163289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,float16,0,0.17549866437911987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,64,0,1,fp8,fp8,0,0.3734133243560791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,float16,0,0.3672373294830322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,fp8,0,0.17681066195170084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,64,128,1,fp8,fp8,0,0.16659733653068542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,fp8,0,0.37105600039164227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,64,0,1,fp8,fp8,0,0.3468639850616455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,float16,0,0.17735999822616577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,fp8,0,0.1790026624997457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,float16,0,0.3717600107192993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,64,128,1,fp8,fp8,0,0.17038933436075845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,fp8,0,0.37335999806722003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,64,128,1,fp8,fp8,0,0.1747786601384481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,64,0,1,fp8,fp8,0,0.3490133285522461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,float16,0,0.1805973251660665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,fp8,0,0.18343466520309448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,float16,0,0.3755306800206502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,fp8,0,0.3781546751658122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,float16,0,0.137855996688207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,64,0,1,fp8,fp8,0,0.3540159861246745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,float16,0,0.24838932355244955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,fp8,0,0.13981866836547852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,64,128,1,fp8,fp8,0,0.13160000244776407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,fp8,0,0.1355946660041809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,fp8,0,0.24660267432530722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,64,0,1,fp8,fp8,0,0.2318506638209025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,float16,0,0.13577066858609518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,64,0,1,fp8,fp8,0,0.22808533906936646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,float16,0,0.24665067593256632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,64,128,1,fp8,fp8,0,0.12955733140309653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,fp8,0,0.24601600567499796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,float16,0,0.13532800475756326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,float16,0,0.24635199705759683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,fp8,0,0.1360319952170054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,64,128,1,fp8,fp8,0,0.1279146671295166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,fp8,0,0.2469386657079061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,64,0,1,fp8,fp8,0,0.2304906646410624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,float16,0,0.13597866892814636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,float16,0,0.24450133244196573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,64,0,1,fp8,fp8,0,0.23002133766810098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,fp8,0,0.1360053320725759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,64,128,1,fp8,fp8,0,0.12748799721399942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,fp8,0,0.2458346684773763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,float16,0,1.6713439623514812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,64,128,1,fp8,fp8,0,1.5293173789978027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,fp8,0,1.683695952097575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,float16,0,1.6845332781473796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,float16,0,3.089109420776367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,64,0,1,fp8,fp8,0,2.856272061665853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,fp8,0,3.1075414021809897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,fp8,0,1.697098731994629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,64,128,1,fp8,fp8,0,1.5433813730875652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,float16,0,3.113210678100586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,float16,0,1.7098080317179363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,fp8,0,3.119488080342611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,fp8,0,1.7264053026835124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,64,0,1,fp8,fp8,0,2.8644587198893228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,64,128,1,fp8,fp8,0,1.5755359331766765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,float16,0,0.963269313176473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,float16,0,3.143930753072103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,fp8,0,0.9813439846038818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,64,128,1,fp8,fp8,0,0.9103413422902426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,64,0,1,fp8,fp8,0,2.9046452840169272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,fp8,0,3.155797322591146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,float16,0,1.6966346104939778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,float16,0,0.8502506415049235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,fp8,0,1.7161226272583008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,64,0,1,fp8,fp8,0,1.5834719340006511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,fp8,0,0.8574986457824707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,64,128,1,fp8,fp8,0,0.7774933179219564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,float16,0,1.570090611775716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,float16,0,0.8583679993947347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,fp8,0,1.5767146746317546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,64,0,1,fp8,fp8,0,1.4475040435791016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,64,128,1,fp8,fp8,0,0.7866186300913492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,fp8,0,0.8652479648590088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,float16,0,1.5793654123942058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,float16,0,0.8695786794026693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,fp8,0,1.5893920262654622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,64,128,1,fp8,fp8,0,0.7985173066457113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,64,0,1,fp8,fp8,0,1.4555733998616536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,fp8,0,0.8772266705830892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,float16,0,1.5931359926859539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,float16,0,0.499722679456075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,fp8,0,0.510586659113566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,fp8,0,1.6009492874145508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,64,128,1,fp8,fp8,0,0.4755466779073079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,float16,0,0.8767253557840983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,fp8,0,0.8866133689880371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,64,0,1,fp8,fp8,0,1.4698932965596516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,fp8,0,0.4459199905395508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,float16,0,0.4426666498184204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,64,0,1,fp8,fp8,0,0.82204802831014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,float16,0,0.8113866647084554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,64,128,1,fp8,fp8,0,0.41046400864919025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,fp8,0,0.8171253204345703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,float16,0,0.44726399580637616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,64,0,1,fp8,fp8,0,0.7528106371561686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,fp8,0,0.4514293273289998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,float16,0,0.8161760171254476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,64,128,1,fp8,fp8,0,0.4145013491312663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,fp8,0,0.8216479619344076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,float16,0,0.4538026650746663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,64,0,1,fp8,fp8,0,0.758682648340861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,fp8,0,0.4572426478068034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,float16,0,0.824938694636027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,64,128,1,fp8,fp8,0,0.4203253189722697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,float16,0,0.26808534065882367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,fp8,0,0.8297279675801595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,64,0,1,fp8,fp8,0,0.7660906314849854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,float16,0,0.46765867869059247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,float16,0,0.23508266607920328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,fp8,0,0.2752266724904378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,64,128,1,fp8,fp8,0,0.2571893334388733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,fp8,0,0.47485868136088055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,64,0,1,fp8,fp8,0,0.44114665190378827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,fp8,0,0.4329599936803182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,fp8,0,0.23745065927505493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,float16,0,0.43089600404103595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,64,128,1,fp8,fp8,0,0.22401599089304605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,64,0,1,fp8,fp8,0,0.4049546718597412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,float16,0,0.23704000314076742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,fp8,0,0.2395520011583964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,float16,0,0.4357706705729167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,64,0,1,fp8,fp8,0,0.4083840052286784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,64,128,1,fp8,fp8,0,0.22580800453821817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,fp8,0,0.43532268206278485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,float16,0,0.24237332741419473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,fp8,0,0.24510933955510458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,float16,0,0.440341313680013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,64,128,1,fp8,fp8,0,0.22906132539113364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,fp8,0,0.4440853198369344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,float16,0,0.15426666537920633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,64,0,1,fp8,fp8,0,0.4124693473180135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,float16,0,0.26286933819452923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,fp8,0,0.15770666797955832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,64,128,1,fp8,fp8,0,0.15037866433461508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,fp8,0,0.2664266626040141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,64,0,1,fp8,fp8,0,0.24971733490626016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,float16,0,0.13570666313171387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,float16,0,0.24238399664560953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,fp8,0,0.13594667116800943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,64,128,1,fp8,fp8,0,0.12754133343696594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,fp8,0,0.24201599756876627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,64,0,1,fp8,fp8,0,0.22550400098164877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,float16,0,0.13781866431236267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,float16,0,0.2428426742553711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,fp8,0,0.1367680033047994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,64,128,1,fp8,fp8,0,0.12840533256530762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,fp8,0,0.24435732762018839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,64,128,1,fp8,fp8,0,0.1334826648235321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,64,0,1,fp8,fp8,0,0.22752533356348673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,fp8,0,0.24889065821965536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,64,0,1,fp8,fp8,0,0.23213332891464233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,float16,0,0.13778666655222574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,float16,0,0.24659732977549234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,fp8,0,0.14020267128944397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,float16,0,0.1070240040620168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,64,0,1,fp8,fp8,0,0.15983999768892923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,fp8,0,0.10708266496658325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,float16,0,0.17064533631006876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,float16,0,0.1696853240331014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,64,128,1,fp8,fp8,0,0.10259733597437541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,fp8,0,0.17002665996551514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,float16,0,0.10534399747848511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,fp8,0,0.1070186694463094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,64,128,1,fp8,fp8,0,0.10086400310198466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,fp8,0,0.170799990495046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,64,0,1,fp8,fp8,0,0.15851199626922607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,float16,0,0.10641599694887798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,float16,0,0.1707786719004313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,fp8,0,0.10524800419807434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,64,128,1,fp8,fp8,0,0.10009599725405376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,fp8,0,0.1704746683438619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,64,0,1,fp8,fp8,0,0.15868799885114035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,float16,0,0.10730666915575664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,float16,0,0.17058134078979492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,fp8,0,0.10708266496658325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,64,128,1,fp8,fp8,0,0.10101866722106934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,fp8,0,0.16842132806777954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,64,0,1,fp8,fp8,0,0.15848533312479654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,float16,0,2.2629547119140625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,64,128,1,fp8,fp8,0,2.0212106704711914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,fp8,0,2.2594614028930664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,float16,0,2.282602628072103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,float16,0,3.3661438624064126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,64,0,1,fp8,fp8,0,3.06276798248291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,fp8,0,3.362837473551432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,float16,0,3.4032106399536133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,fp8,0,2.289269288380941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,64,128,1,fp8,fp8,0,2.042778650919596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,float16,0,2.3074560165405273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,fp8,0,3.389904022216797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,fp8,0,2.3111626307169595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,64,0,1,fp8,fp8,0,3.0759948094685874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,64,128,1,fp8,fp8,0,2.0925280253092446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,float16,0,3.4225918451944985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,float16,0,1.2716960112253826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,fp8,0,1.2850826581319172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,64,128,1,fp8,fp8,0,1.1983946959177654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,64,0,1,fp8,fp8,0,3.1243200302124023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,fp8,0,3.4291518529256186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,float16,0,1.8468373616536458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,fp8,0,1.864799976348877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,float16,0,1.1139146486918132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,64,0,1,fp8,fp8,0,1.7276585896809895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,fp8,0,1.1223573684692383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,64,128,1,fp8,fp8,0,1.0143307050069172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,float16,0,1.6679199536641438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,float16,0,1.1232159932454426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,fp8,0,1.6812480290730794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,64,0,1,fp8,fp8,0,1.5389866828918457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,fp8,0,1.13099201520284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,64,128,1,fp8,fp8,0,1.0238453547159831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,float16,0,1.6838879585266113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,64,0,1,fp8,fp8,0,1.5481972694396973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,fp8,0,1.6892906824747722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,64,128,1,fp8,fp8,0,1.0459360281626384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,float16,0,1.1399253209431965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,float16,0,1.701024055480957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,fp8,0,1.1513813336690266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,fp8,0,1.7110506693522136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,float16,0,0.6471360127131144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,fp8,0,0.6585546731948853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,float16,0,0.9410879611968994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,64,128,1,fp8,fp8,0,0.6126186847686768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,64,0,1,fp8,fp8,0,1.5694773991902669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,fp8,0,0.9540479977925619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,float16,0,0.5694719950358073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,64,0,1,fp8,fp8,0,0.8825279871622721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,fp8,0,0.5758080085118612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,float16,0,0.856719970703125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,64,128,1,fp8,fp8,0,0.5235093434651693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,float16,0,0.5756213267644247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,fp8,0,0.8612586657206217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,64,0,1,fp8,fp8,0,0.7925386428833008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,fp8,0,0.5820159912109375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,float16,0,0.8610453605651855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,64,128,1,fp8,fp8,0,0.5284639994303385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,float16,0,0.5843146642049154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,fp8,0,0.8676106929779053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,64,0,1,fp8,fp8,0,0.7966026465098063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,fp8,0,0.5910826524098715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,float16,0,0.8725759983062744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,64,128,1,fp8,fp8,0,0.5395413239796957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,float16,0,0.3395093282063802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,float16,0,0.4915306568145752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,fp8,0,0.8777600129445394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,64,0,1,fp8,fp8,0,0.8050293127695719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,fp8,0,0.3471839825312297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,64,128,1,fp8,fp8,0,0.32204800844192505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,fp8,0,0.49986668427785236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,64,128,1,fp8,fp8,0,0.2773226698239644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,64,0,1,fp8,fp8,0,0.4634186824162801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,float16,0,0.29768532514572144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,64,0,1,fp8,fp8,0,0.41818666458129883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,float16,0,0.30133867263793945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,float16,0,0.4455519914627075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,64,128,1,fp8,fp8,0,0.2807253400484721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,fp8,0,0.30114134152730304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,64,0,1,fp8,fp8,0,0.420687993367513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,float16,0,0.3077440063158671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,fp8,0,0.45054932435353595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,float16,0,0.45030399163564044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,fp8,0,0.3040213386217753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,fp8,0,0.4540533224741618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,fp8,0,0.45845866203308105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,64,0,1,fp8,fp8,0,0.42522132396698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,float16,0,0.18387732903162637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,float16,0,0.45711998144785565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,fp8,0,0.31113600730895996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,64,128,1,fp8,fp8,0,0.2865973313649495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,float16,0,0.2657439907391866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,fp8,0,0.1888320048650106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,64,128,1,fp8,fp8,0,0.17710934082667032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,fp8,0,0.2712533275286357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,64,0,1,fp8,fp8,0,0.252837340037028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,fp8,0,0.24076799551645914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,float16,0,0.15964266657829285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,float16,0,0.23826134204864502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,fp8,0,0.1604106624921163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,64,128,1,fp8,fp8,0,0.15035200119018555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,64,0,1,fp8,fp8,0,0.22600533564885458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,float16,0,0.16044800480206808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,64,0,1,fp8,fp8,0,0.23024000724156699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,float16,0,0.24055999517440796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,fp8,0,0.1614773372809092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,64,128,1,fp8,fp8,0,0.15414399902025858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,fp8,0,0.24273600180943808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,float16,0,0.16260266304016113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,float16,0,0.24395734071731567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,fp8,0,0.16595733165740967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,64,128,1,fp8,fp8,0,0.1586079994837443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,fp8,0,0.24633600314458212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,64,0,1,fp8,fp8,0,0.23408534129460654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,float16,0,0.10697600245475769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,float16,0,0.15409599741299948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,fp8,0,0.10936533411343892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,64,128,1,fp8,fp8,0,0.10703999797503154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,fp8,0,0.15635733803113303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,64,0,1,fp8,fp8,0,0.1490293343861898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,float16,0,0.09505599737167358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,float16,0,0.14249066511789957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,fp8,0,0.09682133793830872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,64,128,1,fp8,fp8,0,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,64,128,1,fp8,fp8,0,0.09057600299517314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,fp8,0,0.14387733737627664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,64,0,1,fp8,fp8,0,0.1316159963607788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,64,0,1,fp8,fp8,0,0.13353600104649863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,float16,0,0.09674132863680522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,float16,0,0.14266666769981384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,fp8,0,0.09709866841634114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,fp8,0,0.14342400431632996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,float16,0,0.09700799981753032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,float16,0,0.14385599891344705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,fp8,0,0.09890666604042053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,fp8,0,0.07733333110809326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,64,128,1,fp8,fp8,0,0.09178666273752849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,fp8,0,0.145087997118632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,fp8,0,0.10586667060852051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,64,0,1,fp8,fp8,0,0.1336373289426168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,float16,0,0.07622933387756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,float16,0,0.10514133175214131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,64,128,1,fp8,fp8,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,64,0,1,fp8,fp8,0,0.09709866841634114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,float16,0,0.07622933387756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,float16,0,0.10498133301734924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,float16,0,0.07632533212502797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,fp8,0,0.07626666625340779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,64,128,1,fp8,fp8,0,0.072202667593956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,fp8,0,0.10496000448862712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,64,0,1,fp8,fp8,0,0.09805867075920105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,float16,0,0.10566932956377666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,fp8,0,0.0765173335870107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,64,128,1,fp8,fp8,0,0.07182933390140533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,fp8,0,0.10515200098355611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,64,0,1,fp8,fp8,0,0.09869333108266194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,float16,0,0.07700799902280171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,float16,0,0.10525332887967427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,fp8,0,0.07720533510049184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,64,128,1,fp8,fp8,0,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,fp8,0,0.10502933462460835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,64,0,1,fp8,fp8,0,0.09875733653704326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,float16,0,1.6649707158406575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,fp8,0,1.6769173940022786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,64,128,1,fp8,fp8,0,1.5116106669108074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,float16,0,2.200928052266439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,float16,0,1.6809919675191243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,64,0,1,fp8,fp8,0,2.024090607961019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,fp8,0,2.220991929372152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,float16,0,2.22108793258667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,fp8,0,1.694826602935791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,64,128,1,fp8,fp8,0,1.5324266751607258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,64,0,1,fp8,fp8,0,2.040250619252523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,float16,0,1.70797332127889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,fp8,0,2.2340265909830728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,fp8,0,1.7181119918823242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,64,128,1,fp8,fp8,0,1.5567413965861003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,float16,0,2.2523199717203775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,float16,0,0.951136032740275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,fp8,0,2.2676746050516763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,64,0,1,fp8,fp8,0,2.0746453603108725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,fp8,0,0.9665546417236328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,float16,0,1.2397066752115886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,64,128,1,fp8,fp8,0,0.8994719982147217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,fp8,0,1.2530933221181233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,64,0,1,fp8,fp8,0,1.1625813643137615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,float16,0,0.836079994837443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,64,128,1,fp8,fp8,0,0.764847993850708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,fp8,0,0.8432586987813314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,float16,0,1.1134986877441406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,fp8,0,1.1203893025716145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,64,0,1,fp8,fp8,0,1.0246293544769287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,float16,0,0.844101349512736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,fp8,0,0.8508640130360922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,64,128,1,fp8,fp8,0,0.7712746461232504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,float16,0,1.1190933386484783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,fp8,0,1.1264746983846028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,float16,0,0.8555893103281657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,64,0,1,fp8,fp8,0,1.035200039545695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,fp8,0,0.8622666994730631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,float16,0,1.1327892939249675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,fp8,0,1.1390293439229329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,64,128,1,fp8,fp8,0,0.7849813302357992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,float16,0,0.6359359820683798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,float16,0,0.48846399784088135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,64,0,1,fp8,fp8,0,1.0452373027801514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,fp8,0,0.4992106755574544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,64,128,1,fp8,fp8,0,0.4631733496983846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,fp8,0,0.645365317662557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,float16,0,0.571999986966451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,64,0,1,fp8,fp8,0,0.5985759894053141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,float16,0,0.4291040102640788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,fp8,0,0.43431464831034344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,64,128,1,fp8,fp8,0,0.3976159890492757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,fp8,0,0.5764000018437704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,64,0,1,fp8,fp8,0,0.5303839842478434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,float16,0,0.43556265036265057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,float16,0,0.5767146746317545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,fp8,0,0.43904534975687665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,64,128,1,fp8,fp8,0,0.4004053274790446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,fp8,0,0.5799839893976847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,64,0,1,fp8,fp8,0,0.5353066523869833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,float16,0,0.44119465351104736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,float16,0,0.5818026860555013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,fp8,0,0.44676800568898517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,64,128,1,fp8,fp8,0,0.40798401832580566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,fp8,0,0.5879253149032593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,float16,0,0.2571893334388733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,64,0,1,fp8,fp8,0,0.5411786635716757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,float16,0,0.33658134937286377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,fp8,0,0.2627626657485962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,64,128,1,fp8,fp8,0,0.24661866823832193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,fp8,0,0.3431253433227539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,64,0,1,fp8,fp8,0,0.31801066795984906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,float16,0,0.22401599089304605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,float16,0,0.2980639934539795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,fp8,0,0.22611733277638754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,64,128,1,fp8,fp8,0,0.21178134282430014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,fp8,0,0.3004800081253052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,64,0,1,fp8,fp8,0,0.28169065713882446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,float16,0,0.2274399995803833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,float16,0,0.3004639943440755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,fp8,0,0.22789333264033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,float16,0,0.23194666703542074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,64,128,1,fp8,fp8,0,0.21526400248209634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,fp8,0,0.30395734310150146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,64,0,1,fp8,fp8,0,0.28521066904067993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,float16,0,0.3062506715456645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,fp8,0,0.23377066850662231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,64,128,1,fp8,fp8,0,0.2197279930114746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,fp8,0,0.30948267380396527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,64,0,1,fp8,fp8,0,0.2895946701367696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,float16,0,0.14170133074124655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,float16,0,0.18492267529169717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,fp8,0,0.14436266819636026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,64,128,1,fp8,fp8,0,0.13772799571355185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,fp8,0,0.18914133310317993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,64,0,1,fp8,fp8,0,0.17661333084106445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,fp8,0,0.16431466738382974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,float16,0,0.12129599849383037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,float16,0,0.16394666830698648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,fp8,0,0.1239466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,64,128,1,fp8,fp8,0,0.11338133613268535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,64,0,1,fp8,fp8,0,0.1520746648311615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,float16,0,0.12266666690508525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,float16,0,0.16445866227149963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,fp8,0,0.12401066223780315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,64,128,1,fp8,fp8,0,0.11411733428637187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,fp8,0,0.16691199938456217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,64,0,1,fp8,fp8,0,0.15431466698646545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,float16,0,0.12541332840919495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,float16,0,0.08291199803352356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,float16,0,0.16660799582799277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,fp8,0,0.12760532895723978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,64,128,1,fp8,fp8,0,0.11931733290354411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,fp8,0,0.1692053278287252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,64,0,1,fp8,fp8,0,0.15854400396347046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,float16,0,0.10571199655532837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,fp8,0,0.08432533343633015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,64,128,1,fp8,fp8,0,0.08328000207742055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,fp8,0,0.10930666327476501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,64,0,1,fp8,fp8,0,0.10390399893124898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,float16,0,0.07625600198904674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,float16,0,0.10104533036549886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,fp8,0,0.07647466659545898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,fp8,0,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,64,128,1,fp8,fp8,0,0.07111466427644093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,fp8,0,0.10090133547782898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,64,0,1,fp8,fp8,0,0.09379200140635173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,float16,0,0.07653866708278656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,float16,0,0.09942400455474854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,64,128,1,fp8,fp8,0,0.0721973329782486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,fp8,0,0.10130133231480916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,64,0,1,fp8,fp8,0,0.09514666597048442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,float16,0,0.0765173335870107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,float16,0,0.1009173293908437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,fp8,0,0.07830933233102162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,64,128,1,fp8,fp8,0,0.07363200187683105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,fp8,0,0.10292800267537434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,64,0,1,fp8,fp8,0,0.09618666768074036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,float16,0,0.060271998246510826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,float16,0,0.07844266792138417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,fp8,0,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,fp8,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,64,128,1,fp8,fp8,0,0.056234667698542275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,fp8,0,0.07844266792138417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,64,0,1,fp8,fp8,0,0.07255466779073079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,float16,0,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,float16,0,0.077674667040507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,64,128,1,fp8,fp8,0,0.05645333230495453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,fp8,0,0.07858666777610779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,fp8,0,0.07832000156243642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,64,0,1,fp8,fp8,0,0.07506666580835979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,float16,0,0.0602453351020813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,float16,0,0.07844266792138417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,64,128,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,64,0,1,fp8,fp8,0,0.07258133093516032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,float16,0,0.060122668743133545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,float16,0,0.07834666470686595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,fp8,0,0.05995733539263407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,64,128,1,fp8,fp8,0,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,fp8,0,0.07703466713428497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,64,0,1,fp8,fp8,0,0.07424533367156982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,float16,0,1.9705653190612793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,fp8,0,1.9701919555664062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,64,128,1,fp8,fp8,0,1.8501866658528645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,float16,0,2.308128039042155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,fp8,0,2.308677355448405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,float16,0,1.982378641764323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,64,0,1,fp8,fp8,0,2.1857706705729165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,float16,0,2.321733315785726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,fp8,0,1.9730346997578938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,64,128,1,fp8,fp8,0,1.8743573824564617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,fp8,0,2.3168586095174155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,64,0,1,fp8,fp8,0,2.2197066942850747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,float16,0,2.0586613019307456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,fp8,0,2.026869297027588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,64,128,1,fp8,fp8,0,1.9624427159627278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,float16,0,2.418053309122721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,float16,0,1.081210692723592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,float16,0,1.2688000202178955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,fp8,0,2.3734347025553384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,fp8,0,1.0605546633402507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,64,128,1,fp8,fp8,0,1.0446773370107014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,fp8,0,1.2491412957509358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,64,0,1,fp8,fp8,0,2.3034079869588218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,64,0,1,fp8,fp8,0,1.2178080081939697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,float16,0,0.9961653550465902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,float16,0,1.1662399768829346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,fp8,0,0.9935146967569987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,64,128,1,fp8,fp8,0,0.9309333165486654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,fp8,0,1.1700373490651448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,64,0,1,fp8,fp8,0,1.1011679967244465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,float16,0,0.9973546663920084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,float16,0,1.174613316853841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,fp8,0,0.9961067040761312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,64,128,1,fp8,fp8,0,0.9408853054046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,fp8,0,1.1695573329925537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,64,0,1,fp8,fp8,0,1.1128586928049724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,float16,0,1.005184014638265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,float16,0,1.179696003595988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,fp8,0,1.001477320988973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,64,128,1,fp8,fp8,0,0.9652853012084961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,float16,0,0.5521386861801147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,fp8,0,1.179418643315633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,float16,0,0.6462453206380209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,64,0,1,fp8,fp8,0,1.1312639713287354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,fp8,0,0.5400373140970866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,64,128,1,fp8,fp8,0,0.5299199819564819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,fp8,0,0.6340266863505045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,64,0,1,fp8,fp8,0,0.6190880139668783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,float16,0,0.5063360134760538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,float16,0,0.5935573180516561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,fp8,0,0.5049120187759399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,fp8,0,0.5943253437678019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,64,128,1,fp8,fp8,0,0.4730986754099528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,64,0,1,fp8,fp8,0,0.5604533354441324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,float16,0,0.5092639923095703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,float16,0,0.5977760155995687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,fp8,0,0.5081013441085815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,64,128,1,fp8,fp8,0,0.478928009668986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,fp8,0,0.5984319845835367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,64,0,1,fp8,fp8,0,0.5675040086110433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,float16,0,0.5112799803415934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,float16,0,0.6009493271509806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,fp8,0,0.6004000107447306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,fp8,0,0.5108373165130615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,64,128,1,fp8,fp8,0,0.4843573172887166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,float16,0,0.2845439910888672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,64,0,1,fp8,fp8,0,0.5697439908981323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,float16,0,0.3349279959996541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,fp8,0,0.2797973354657491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,64,128,1,fp8,fp8,0,0.27526400486628216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,fp8,0,0.32943467299143475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,64,0,1,fp8,fp8,0,0.3217759927113851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,float16,0,0.2602986693382263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,float16,0,0.3076266646385193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,fp8,0,0.26076799631118774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,64,128,1,fp8,fp8,0,0.24658133586247763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,fp8,0,0.3081973393758138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,64,128,1,fp8,fp8,0,0.2497119903564453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,64,0,1,fp8,fp8,0,0.291706661383311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,float16,0,0.26265599330266315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,float16,0,0.3112000028292338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,fp8,0,0.2627253333727519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,fp8,0,0.3108746608098348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,64,0,1,fp8,fp8,0,0.29706666866938275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,float16,0,0.2658613324165344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,float16,0,0.3124106725056966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,float16,0,0.17965867122014365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,fp8,0,0.26502400636672974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,fp8,0,0.1509066621462504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,64,128,1,fp8,fp8,0,0.2531893253326416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,fp8,0,0.31275200843811035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,64,0,1,fp8,fp8,0,0.29812800884246826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,float16,0,0.1543839971224467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,64,128,1,fp8,fp8,0,0.15060800313949585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,fp8,0,0.17783466974894205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,64,0,1,fp8,fp8,0,0.17473600308100382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,float16,0,0.1377226710319519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,float16,0,0.1639946699142456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,fp8,0,0.13797866304715475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,64,128,1,fp8,fp8,0,0.13158399860064188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,fp8,0,0.16356266538302103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,64,0,1,fp8,fp8,0,0.15710933009783426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,float16,0,0.13884266217549643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,float16,0,0.16408000389734903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,float16,0,0.16670932372411093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,fp8,0,0.13970133662223816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,64,128,1,fp8,fp8,0,0.13361600041389465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,fp8,0,0.16397333145141602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,64,0,1,fp8,fp8,0,0.15758400162061056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,float16,0,0.1420799990495046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,fp8,0,0.14178666472434998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,64,128,1,fp8,fp8,0,0.13548266887664795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,fp8,0,0.16619732975959778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,64,0,1,fp8,fp8,0,0.16083733240763345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,float16,0,0.08422399560610454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,float16,0,0.09784000118573506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,fp8,0,0.08307200173536937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,64,128,1,fp8,fp8,0,0.0865760048230489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,fp8,0,0.09774399797121684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,64,0,1,fp8,fp8,0,0.09904000163078308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,float16,0,0.07783466577529907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,float16,0,0.09246933460235596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,fp8,0,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,float16,0,0.09150399764378865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,64,128,1,fp8,fp8,0,0.07245866457621257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,fp8,0,0.09292266766230266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,64,0,1,fp8,fp8,0,0.08655466636021932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,64,0,1,fp8,fp8,0,0.08617066343625386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,float16,0,0.07835199932257335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,fp8,0,0.07864533364772797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,64,128,1,fp8,fp8,0,0.07318933308124542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,fp8,0,0.09145599603652954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,float16,0,0.07828266421953838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,float16,0,0.09224533041318257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,fp8,0,0.07865599791208903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,64,128,1,fp8,fp8,0,0.07423466444015503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,fp8,0,0.09300800164540608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,64,128,1,fp8,fp8,0,0.049626668294270836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,64,0,1,fp8,fp8,0,0.08686932921409607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,64,0,1,fp8,fp8,0,0.05752533177534739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,float16,0,0.04990933338801066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,float16,0,0.058005332946777344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,fp8,0,0.05016533533732096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,fp8,0,0.05839466551939646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,float16,0,0.04990933338801066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,float16,0,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,float16,0,0.05646933118502299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,fp8,0,0.04961066444714864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,fp8,0,0.04900266726811727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,64,128,1,fp8,fp8,0,0.0467199981212616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,fp8,0,0.05574933191140493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,64,0,1,fp8,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,float16,0,0.04920533299446106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,64,128,1,fp8,fp8,0,0.045642669002215065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,64,128,1,fp8,fp8,0,0.04567466676235199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,fp8,0,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,64,0,1,fp8,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,float16,0,0.03571200122435888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,float16,0,0.04909333089987437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,fp8,0,0.035029334326585136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,float16,0,0.05677866439024607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,fp8,0,0.049135997891426086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,64,0,1,fp8,fp8,0,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,float16,0,0.0406986673672994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,64,128,1,fp8,fp8,0,0.035088000198205314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,fp8,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,64,0,1,fp8,fp8,0,0.04043733328580856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,float16,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,float16,0,0.04037333279848099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,fp8,0,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,64,128,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,64,128,1,fp8,fp8,0,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,64,0,1,fp8,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,float16,0,0.033930666744709015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,float16,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,fp8,0,0.03509333233038584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,fp8,0,0.0413973331451416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,64,0,1,fp8,fp8,0,0.03810133288304011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,float16,0,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,float16,0,0.040421334405740104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,64,128,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,fp8,0,0.04104000081618627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,64,0,1,fp8,fp8,0,0.03841600070397059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,float16,0,1.907807985941569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,float16,0,1.923957347869873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,fp8,0,1.9013226826985676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,64,128,1,fp8,fp8,0,1.843392054239909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,fp8,0,1.9188693364461262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,64,0,1,fp8,fp8,0,1.8689014116923015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,float16,0,1.9137226740519206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,float16,0,1.9327252705891926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,fp8,0,1.9092267354329426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,64,128,1,fp8,fp8,0,1.8558346430460613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,fp8,0,1.923397382100423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,64,0,1,fp8,fp8,0,1.8682187398274739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,float16,0,2.0037546157836914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,float16,0,2.027952035268148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,fp8,0,1.968565305074056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,64,128,1,fp8,fp8,0,1.915338675181071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,float16,0,1.0454346338907878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,float16,0,1.0647040208180745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,fp8,0,1.027685324350993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,fp8,0,1.9518027305603027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,64,128,1,fp8,fp8,0,1.0222667058308919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,64,0,1,fp8,fp8,0,1.9370187123616536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,64,0,1,fp8,fp8,0,1.0389760335286458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,fp8,0,1.0451253255208333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,float16,0,0.9590720335642496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,float16,0,0.9695680141448975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,fp8,0,0.9594613711039225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,64,128,1,fp8,fp8,0,0.9037493069966634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,fp8,0,0.9679893652598063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,64,0,1,fp8,fp8,0,0.9186826546986898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,float16,0,0.9665226936340332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,float16,0,0.9728000164031982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,fp8,0,0.963759978612264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,64,128,1,fp8,fp8,0,0.9136853218078613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,fp8,0,0.9719733397165934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,64,0,1,fp8,fp8,0,0.9273599783579508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,float16,0,0.9763360023498535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,float16,0,0.9839466412862142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,fp8,0,0.96833602587382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,64,128,1,fp8,fp8,0,0.9456426302591959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,float16,0,0.5355733235677084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,float16,0,0.5443520148595175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,64,0,1,fp8,fp8,0,0.9633706410725912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,fp8,0,0.9805493354797363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,fp8,0,0.5226399898529053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,64,128,1,fp8,fp8,0,0.5175413290659586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,fp8,0,0.5308053493499756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,64,0,1,fp8,fp8,0,0.5258080164591471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,float16,0,0.4896320104598999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,64,128,1,fp8,fp8,0,0.46058666706085205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,float16,0,0.4936426480611165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,fp8,0,0.4878773291905721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,fp8,0,0.49292266368865967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,64,0,1,fp8,fp8,0,0.46779199441274005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,float16,0,0.4933653275171916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,fp8,0,0.4953226645787557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,64,0,1,fp8,fp8,0,0.47122665246327716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,float16,0,0.49767998854319256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,fp8,0,0.49189865589141846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,64,128,1,fp8,fp8,0,0.4660053253173828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,float16,0,0.4952426751454671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,float16,0,0.5000960032145182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,float16,0,0.27505600452423096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,fp8,0,0.4944213231404622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,64,128,1,fp8,fp8,0,0.4721333185831706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,fp8,0,0.49781866868336994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,64,0,1,fp8,fp8,0,0.47760534286499023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,64,0,1,fp8,fp8,0,0.2730986674626668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,float16,0,0.27937599023183185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,fp8,0,0.2711413304011027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,64,128,1,fp8,fp8,0,0.2688106695810954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,fp8,0,0.27534933884938556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,float16,0,0.2535146673520406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,float16,0,0.2556053400039673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,fp8,0,0.25274133682250977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,64,128,1,fp8,fp8,0,0.23917333285013834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,fp8,0,0.2546186645825704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,64,0,1,fp8,fp8,0,0.24309333165486655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,float16,0,0.2545973261197408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,float16,0,0.2577280004819234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,fp8,0,0.2558613419532776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,64,128,1,fp8,fp8,0,0.24396266539891562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,fp8,0,0.25712533791859943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,64,0,1,fp8,fp8,0,0.2481279969215393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,float16,0,0.2572106719017029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,float16,0,0.25946666797002155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,fp8,0,0.2566080093383789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,64,128,1,fp8,fp8,0,0.24385066827138266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,fp8,0,0.25991467634836835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,fp8,0,0.14849600195884705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,64,0,1,fp8,fp8,0,0.24768533309300741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,float16,0,0.1495786706606547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,float16,0,0.15170666575431824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,fp8,0,0.14622933665911356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,64,128,1,fp8,fp8,0,0.14684266845385233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,64,0,1,fp8,fp8,0,0.14923200011253357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,float16,0,0.1335093379020691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,float16,0,0.13489066561063132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,fp8,0,0.13368533054987589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,64,128,1,fp8,fp8,0,0.12771733601888022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,fp8,0,0.13538133104642233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,64,0,1,fp8,fp8,0,0.12961066762606302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,float16,0,0.1341813306013743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,float16,0,0.1371999979019165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,fp8,0,0.13477866848309836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,64,128,1,fp8,fp8,0,0.12980799873669943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,64,128,1,fp8,fp8,0,0.13226133584976196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,fp8,0,0.1353600025177002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,64,0,1,fp8,fp8,0,0.13156267007191977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,float16,0,0.13612266381581625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,float16,0,0.08162666857242584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,float16,0,0.138373335202535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,fp8,0,0.13598400354385376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,fp8,0,0.13769599795341492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,64,0,1,fp8,fp8,0,0.08475200335184734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,64,0,1,fp8,fp8,0,0.13369066516558328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,float16,0,0.08237333099047343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,fp8,0,0.07657599945863088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,fp8,0,0.08024533092975616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,64,128,1,fp8,fp8,0,0.08286400139331818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,fp8,0,0.0825493335723877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,float16,0,0.07603733241558075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,float16,0,0.07628266513347626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,float16,0,0.07626666625340779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,64,128,1,fp8,fp8,0,0.07148266832033794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,fp8,0,0.07650133470694225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,64,0,1,fp8,fp8,0,0.0708426684141159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,float16,0,0.07667199770609538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,fp8,0,0.07654933134714763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,float16,0,0.07622933387756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,64,128,1,fp8,fp8,0,0.0718399981657664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,64,128,1,fp8,fp8,0,0.07035199801127116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,fp8,0,0.07653333246707916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,64,0,1,fp8,fp8,0,0.07241599758466084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,float16,0,0.07585600018501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,fp8,0,0.07641066610813141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,fp8,0,0.0780213326215744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,64,0,1,fp8,fp8,0,0.07251733541488647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,float16,0,0.04868799944718679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,float16,0,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,fp8,0,0.049365331729253135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,64,128,1,fp8,fp8,0,0.047685335079828896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,64,0,1,fp8,fp8,0,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,float16,0,0.04719999929269155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,float16,0,0.047584002216657005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,64,128,1,fp8,fp8,0,0.04456000030040741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,fp8,0,0.04731200138727824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,64,128,1,fp8,fp8,0,0.044938668608665466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,64,0,1,fp8,fp8,0,0.045253331462542214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,float16,0,0.046469335754712425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,float16,0,0.04660800099372864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,float16,0,0.04738133152325948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,fp8,0,0.04828799764315287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,64,0,1,fp8,fp8,0,0.044719999035199486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,float16,0,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,64,128,1,fp8,fp8,0,0.045797333121299744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,fp8,0,0.048309331138928734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,64,0,1,fp8,fp8,0,0.04566933214664459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,float16,0,0.034927998979886375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,float16,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,fp8,0,0.036346666514873505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,64,128,1,fp8,fp8,0,0.035162667433420815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,fp8,0,0.03532266616821289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,64,0,1,fp8,fp8,0,0.03472000112136205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,float16,0,0.03499733408292135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,float16,0,0.03472533325354258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,fp8,0,0.03396799912055334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,64,128,1,fp8,fp8,0,0.032858667274316154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,fp8,0,0.035301332672437034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,64,0,1,fp8,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,float16,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,float16,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,64,128,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,fp8,0,0.03384533276160558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,64,0,1,fp8,fp8,0,0.03201066702604294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,float16,0,0.03493333359559377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,64,128,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,fp8,0,0.033999999364217125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,64,0,1,fp8,fp8,0,0.03254399945338567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,float16,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,64,128,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,64,0,1,fp8,fp8,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,float16,0,0.026149332523345947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,float16,0,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,64,128,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,64,0,1,fp8,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,float16,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,float16,0,0.02493866781393687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,fp8,0,0.02438933402299881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,64,128,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,64,128,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,64,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,float16,0,0.8867519696553549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,float16,0,0.8669013182322184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,fp8,0,0.882256031036377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,64,128,1,fp8,fp8,0,0.8321173191070557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,fp8,0,0.866207997004191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,64,0,1,fp8,fp8,0,0.816810687383016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,float16,0,0.8911146322886149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,float16,0,0.8739519913991293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,fp8,0,0.8877493540445963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,64,128,1,fp8,fp8,0,0.841327985127767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,fp8,0,0.8685812950134277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,64,0,1,fp8,fp8,0,0.8271893660227457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,float16,0,0.9015359878540039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,float16,0,0.8852480252583822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,fp8,0,0.8966346581776937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,64,128,1,fp8,fp8,0,0.8821547031402588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,float16,0,0.4967786471048991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,fp8,0,0.8775253295898438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,64,0,1,fp8,fp8,0,0.8628479639689127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,float16,0,0.48853333791097003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,fp8,0,0.4887626568476359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,64,128,1,fp8,fp8,0,0.4856853485107422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,fp8,0,0.4792106548945109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,64,0,1,fp8,fp8,0,0.47724799315134686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,fp8,0,0.4482666651407878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,float16,0,0.4481653372446696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,float16,0,0.4400586684544881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,64,128,1,fp8,fp8,0,0.42367998758951825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,fp8,0,0.4387306769688924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,64,0,1,fp8,fp8,0,0.41552531719207764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,float16,0,0.4530346790949504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,float16,0,0.44281601905822754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,fp8,0,0.44257601102193195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,fp8,0,0.4515519936879476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,64,128,1,fp8,fp8,0,0.42717333634694415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,64,0,1,fp8,fp8,0,0.41963199774424237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,float16,0,0.4561760028203328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,float16,0,0.4472693204879761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,fp8,0,0.4537546634674072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,64,128,1,fp8,fp8,0,0.4347466627756755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,fp8,0,0.25278933842976886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,float16,0,0.25675733884175617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,fp8,0,0.4461439847946167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,64,0,1,fp8,fp8,0,0.4264053503672282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,float16,0,0.25271467367808026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,64,128,1,fp8,fp8,0,0.2544906735420227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,fp8,0,0.24862400690714517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,64,128,1,fp8,fp8,0,0.22078933318456015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,64,0,1,fp8,fp8,0,0.2490560015042623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,float16,0,0.2336746652921041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,float16,0,0.22872000932693481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,fp8,0,0.2343519926071167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,fp8,0,0.22913066546122232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,64,0,1,fp8,fp8,0,0.21686400969823202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,float16,0,0.23603200912475586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,float16,0,0.23051732778549194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,fp8,0,0.23641065756479898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,64,128,1,fp8,fp8,0,0.2263573408126831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,fp8,0,0.23015467325846353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,64,0,1,fp8,fp8,0,0.22122132778167725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,float16,0,0.23849066098531088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,float16,0,0.2329439918200175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,fp8,0,0.23705067237218222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,64,128,1,fp8,fp8,0,0.22722133000691733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,fp8,0,0.2323840061823527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,64,0,1,fp8,fp8,0,0.22379199663798013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,float16,0,0.13797866304715475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,float16,0,0.13531733552614847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,fp8,0,0.13557866215705872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,64,128,1,fp8,fp8,0,0.13801599542299905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,fp8,0,0.1332319974899292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,64,0,1,fp8,fp8,0,0.13659733533859253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,float16,0,0.12350400288899739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,float16,0,0.12034133076667786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,fp8,0,0.12355732917785645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,64,128,1,fp8,fp8,0,0.11726400256156921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,fp8,0,0.12076266606648763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,64,0,1,fp8,fp8,0,0.11377066373825073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,float16,0,0.12363732854525249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,float16,0,0.12246400117874146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,fp8,0,0.12443733215332031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,64,128,1,fp8,fp8,0,0.11917333801587422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,fp8,0,0.12139200170834859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,64,0,1,fp8,fp8,0,0.11635200182596843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,float16,0,0.12541332840919495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,float16,0,0.12274666627248128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,float16,0,0.07516799867153168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,fp8,0,0.12648000319798788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,64,128,1,fp8,fp8,0,0.12134400010108948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,fp8,0,0.1237386663754781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,64,0,1,fp8,fp8,0,0.11949333548545837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,float16,0,0.07668266693751018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,fp8,0,0.07561600208282471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,fp8,0,0.07128533224264781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,64,128,1,fp8,fp8,0,0.07889600098133087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,fp8,0,0.07396266857783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,64,0,1,fp8,fp8,0,0.07804266611735027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,float16,0,0.07021333277225494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,float16,0,0.06990399956703186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,64,128,1,fp8,fp8,0,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,fp8,0,0.06982933481534322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,64,0,1,fp8,fp8,0,0.06492266555627187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,64,0,1,fp8,fp8,0,0.06596800188223521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,float16,0,0.07045333087444305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,float16,0,0.0683840016523997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,fp8,0,0.07120533287525177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,64,128,1,fp8,fp8,0,0.06663466493288676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,fp8,0,0.07007466753323872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,float16,0,0.07177599767843883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,float16,0,0.07008000214894612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,fp8,0,0.07230933507283528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,64,128,1,fp8,fp8,0,0.06640000144640605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,fp8,0,0.06945600112279256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,64,0,1,fp8,fp8,0,0.06610133250554402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,float16,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,float16,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,fp8,0,0.04621866842110952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,64,128,1,fp8,fp8,0,0.04580266773700714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,fp8,0,0.04628799855709076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,64,0,1,fp8,fp8,0,0.04418666660785675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,float16,0,0.044821331898371376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,float16,0,0.044250667095184326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,fp8,0,0.045168002446492515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,64,128,1,fp8,fp8,0,0.042090664307276406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,64,128,1,fp8,fp8,0,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,64,0,1,fp8,fp8,0,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,float16,0,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,float16,0,0.04526400069395701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,fp8,0,0.04527466495831808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,64,0,1,fp8,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,float16,0,0.04491200049718221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,float16,0,0.04369066655635834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,float16,0,0.03196800003449122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,64,128,1,fp8,fp8,0,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,fp8,0,0.044106667240460716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,64,0,1,fp8,fp8,0,0.04248000184694926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,float16,0,0.0322026660044988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,float16,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,64,128,1,fp8,fp8,0,0.03298133363326391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,fp8,0,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,64,0,1,fp8,fp8,0,0.032357332607110344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,float16,0,0.0330079992612203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,64,128,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,64,128,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,64,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,float16,0,0.03294933338960012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,float16,0,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,fp8,0,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,64,0,1,fp8,fp8,0,0.030799999833106995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,float16,0,0.03200533241033554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,float16,0,0.031888000667095184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,fp8,0,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,64,128,1,fp8,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,64,0,1,fp8,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,64,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,64,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,float16,0,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,fp8,0,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,64,128,1,fp8,fp8,0,0.022682666778564453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,fp8,0,0.022885332504908245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,64,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,fp8,0,0.022197333474953968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,64,128,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,fp8,0,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,64,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,64,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,float16,0,0.020586666961510975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,float16,0,0.02046400060256322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,64,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,float16,0,0.4738773504892985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,float16,0,0.4734400113423665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,fp8,0,0.4720799922943115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,64,128,1,fp8,fp8,0,0.45196266969045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,fp8,0,0.47253866990407306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,float16,0,0.47920533021291095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,64,0,1,fp8,fp8,0,0.4508746862411499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,float16,0,0.4773333470026652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,fp8,0,0.4766240119934082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,64,128,1,fp8,fp8,0,0.4591626723607381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,fp8,0,0.476911981900533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,64,0,1,fp8,fp8,0,0.4589066505432129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,float16,0,0.4819253285725911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,float16,0,0.48180798689524335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,fp8,0,0.4810826778411865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,float16,0,0.2664480010668437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,64,128,1,fp8,fp8,0,0.46564801534016925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,fp8,0,0.47864000002543133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,fp8,0,0.2625173330307007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,64,0,1,fp8,fp8,0,0.4650346835454305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,float16,0,0.2666026751200358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,64,0,1,fp8,fp8,0,0.26576000452041626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,64,128,1,fp8,fp8,0,0.26664533217748004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,fp8,0,0.26428266366322833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,fp8,0,0.24256000916163126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,float16,0,0.24457067251205444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,float16,0,0.2429386576016744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,64,128,1,fp8,fp8,0,0.2332906723022461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,fp8,0,0.24397865931193033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,64,0,1,fp8,fp8,0,0.2339199980099996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,fp8,0,0.2458826700846354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,float16,0,0.24623999993006387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,float16,0,0.24523733059565225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,fp8,0,0.24592532714207968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,64,128,1,fp8,fp8,0,0.23987199862798056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,64,0,1,fp8,fp8,0,0.2391306757926941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,float16,0,0.2487199902534485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,float16,0,0.248416006565094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,fp8,0,0.2485333283742269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,64,128,1,fp8,fp8,0,0.23993066946665445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,fp8,0,0.24869867165883383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,64,0,1,fp8,fp8,0,0.23971199989318848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,float16,0,0.1439253290494283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,float16,0,0.14389866590499878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,fp8,0,0.14301333824793497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,64,128,1,fp8,fp8,0,0.14383467038472494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,fp8,0,0.14215466380119324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,64,0,1,fp8,fp8,0,0.1442080040772756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,float16,0,0.13174399733543396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,float16,0,0.12989333271980286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,float16,0,0.13144000371297201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,fp8,0,0.12947733203570047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,64,128,1,fp8,fp8,0,0.12366933623949687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,fp8,0,0.13198399543762207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,64,0,1,fp8,fp8,0,0.1239520013332367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,float16,0,0.13168533643086752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,fp8,0,0.132533331712087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,64,128,1,fp8,fp8,0,0.1255573332309723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,fp8,0,0.13195199767748514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,64,0,1,fp8,fp8,0,0.1253493328889211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,64,0,1,fp8,fp8,0,0.12777066230773926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,float16,0,0.13346667091051737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,float16,0,0.13221866885821024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,fp8,0,0.13243200381596884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,64,128,1,fp8,fp8,0,0.1276479959487915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,fp8,0,0.07638933261235555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,fp8,0,0.13357866803805032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,float16,0,0.07627200086911519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,fp8,0,0.07701866825421651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,64,128,1,fp8,fp8,0,0.06814933319886525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,64,128,1,fp8,fp8,0,0.08040533463160197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,64,0,1,fp8,fp8,0,0.07894933223724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,float16,0,0.07042666773001353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,float16,0,0.07052800059318542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,fp8,0,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,fp8,0,0.07220800220966339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,64,0,1,fp8,fp8,0,0.06765866776307423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,float16,0,0.07212266822655995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,float16,0,0.07153066496054332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,fp8,0,0.07251733541488647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,64,128,1,fp8,fp8,0,0.06738666693369548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,fp8,0,0.0720960001150767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,fp8,0,0.07272000114123027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,64,0,1,fp8,fp8,0,0.06825600067774455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,float16,0,0.07211199899514516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,float16,0,0.07252799967924754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,fp8,0,0.07277866701285045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,64,128,1,fp8,fp8,0,0.06842666864395142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,64,0,1,fp8,fp8,0,0.06799999872843425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,64,0,1,fp8,fp8,0,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,float16,0,0.04497066636880239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,float16,0,0.04758933186531067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,float16,0,0.04683733483155569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,fp8,0,0.04689066608746847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,64,128,1,fp8,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,fp8,0,0.04655466477076212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,float16,0,0.04500266909599304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,64,128,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,fp8,0,0.045509333411852516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,64,0,1,fp8,fp8,0,0.041984001795450844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,float16,0,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,float16,0,0.044906665881474815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,fp8,0,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,64,128,1,fp8,fp8,0,0.0420959989229838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,fp8,0,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,64,0,1,fp8,fp8,0,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,float16,0,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,float16,0,0.04413333535194397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,64,128,1,fp8,fp8,0,0.04205333193143209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,64,0,1,fp8,fp8,0,0.0424586683511734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,float16,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,float16,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,fp8,0,0.03258133431275686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,64,128,1,fp8,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,64,0,1,fp8,fp8,0,0.03017599880695343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,float16,0,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,float16,0,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,64,128,1,fp8,fp8,0,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,64,0,1,fp8,fp8,0,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,fp8,0,0.030218665798505146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,float16,0,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,fp8,0,0.03046933313210805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,64,128,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,64,0,1,fp8,fp8,0,0.02976000060637792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,float16,0,0.029872000217437744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,64,128,1,fp8,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,fp8,0,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,fp8,0,0.03121600051720937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,64,0,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,64,128,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,fp8,0,0.024101334313551586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,64,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,float16,0,0.022885332504908245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,64,128,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,fp8,0,0.022634667654832203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,64,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,64,128,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,64,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,64,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,64,0,1,fp8,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,64,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,64,128,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,float16,0,0.3410079876581828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,float16,0,0.34039998054504395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,fp8,0,0.3391520182291667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,64,0,1,fp8,fp8,0,0.3182026743888855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,64,128,1,fp8,fp8,0,0.3181973298390706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,fp8,0,0.33894399801890057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,float16,0,0.3426560163497925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,float16,0,0.3439466555913289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,fp8,0,0.34190932909647626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,64,128,1,fp8,fp8,0,0.32502933343251544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,fp8,0,0.3421866496404012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,64,0,1,fp8,fp8,0,0.325381338596344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,float16,0,0.34452799956003827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,float16,0,0.34548266728719074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,fp8,0,0.3439573446909587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,64,128,1,fp8,fp8,0,0.3261813322703044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,fp8,0,0.34272531668345135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,64,0,1,fp8,fp8,0,0.326474666595459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,float16,0,0.19151999553044638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,float16,0,0.19055465857187906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,fp8,0,0.18903466065724692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,64,128,1,fp8,fp8,0,0.18619734048843384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,fp8,0,0.18917866547902426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,64,0,1,fp8,fp8,0,0.18714133898417154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,fp8,0,0.1786186695098877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,64,0,1,fp8,fp8,0,0.1667626698811849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,float16,0,0.1768746574719747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,float16,0,0.17898666858673096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,float16,0,0.17765865723292032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,fp8,0,0.17851199706395468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,64,128,1,fp8,fp8,0,0.16675732533137003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,float16,0,0.17929067214330038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,fp8,0,0.1788533329963684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,64,128,1,fp8,fp8,0,0.16900799671808878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,fp8,0,0.18025066455205283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,fp8,0,0.1786186695098877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,64,0,1,fp8,fp8,0,0.16842132806777954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,float16,0,0.18077866236368814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,float16,0,0.1792800029118856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,64,128,1,fp8,fp8,0,0.17043733596801758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,fp8,0,0.180293341477712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,float16,0,0.10254399975140889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,64,0,1,fp8,fp8,0,0.17268266280492148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,float16,0,0.10082667072614034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,fp8,0,0.10082667072614034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,64,128,1,fp8,fp8,0,0.10230933626492818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,fp8,0,0.10107200344403584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,64,0,1,fp8,fp8,0,0.10081066687901814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,float16,0,0.09669867157936096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,float16,0,0.09642133116722107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,fp8,0,0.09681600332260132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,64,128,1,fp8,fp8,0,0.09034666419029236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,fp8,0,0.09494400024414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,64,0,1,fp8,fp8,0,0.08876799543698628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,float16,0,0.09700266520182292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,float16,0,0.09660266836484273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,fp8,0,0.09672000010808308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,float16,0,0.09611733754475911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,64,128,1,fp8,fp8,0,0.08983467022577922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,fp8,0,0.09582933783531189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,64,0,1,fp8,fp8,0,0.09072533249855042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,float16,0,0.09582400321960449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,fp8,0,0.09674666325251262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,64,128,1,fp8,fp8,0,0.09077866872151692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,fp8,0,0.09700266520182292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,64,0,1,fp8,fp8,0,0.09090666969617207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,float16,0,0.05638933181762695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,float16,0,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,float16,0,0.055770665407180786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,64,128,1,fp8,fp8,0,0.055013333757718406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,fp8,0,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,64,0,1,fp8,fp8,0,0.05659199754397074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,float16,0,0.05477866530418396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,float16,0,0.05573866764704386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,fp8,0,0.0544106662273407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,64,128,1,fp8,fp8,0,0.05217066903909048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,fp8,0,0.05585066477457682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,64,0,1,fp8,fp8,0,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,float16,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,fp8,0,0.05585066477457682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,64,128,1,fp8,fp8,0,0.051962668697039284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,fp8,0,0.05473066866397858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,fp8,0,0.05530133346716563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,64,0,1,fp8,fp8,0,0.05230933427810669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,float16,0,0.055205335219701133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,float16,0,0.056133334835370384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,fp8,0,0.056128000219662987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,64,128,1,fp8,fp8,0,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,64,0,1,fp8,fp8,0,0.05223466455936432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,float16,0,0.03590933233499527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,float16,0,0.03620799879233042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,fp8,0,0.03870933254559835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,64,128,1,fp8,fp8,0,0.03606933355331421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,fp8,0,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,64,0,1,fp8,fp8,0,0.036362667878468834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,float16,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,float16,0,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,float16,0,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,64,128,1,fp8,fp8,0,0.035989334185918175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,fp8,0,0.036277333895365395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,64,0,1,fp8,fp8,0,0.03365333378314972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,float16,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,fp8,0,0.03600533306598663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,64,128,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,64,0,1,fp8,fp8,0,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,float16,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,float16,0,0.035989334185918175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,64,128,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,64,0,1,fp8,fp8,0,0.035029334326585136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,fp8,0,0.026416001220544178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,64,128,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,64,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,float16,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,64,128,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,64,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,float16,0,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,float16,0,0.02610666553179423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,64,128,1,fp8,fp8,0,0.024112001061439514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,64,0,1,fp8,fp8,0,0.024826665719350178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,fp8,0,0.02603200078010559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,64,128,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,fp8,0,0.02569599946339925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,64,0,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,64,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,64,128,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,float16,0,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,float16,0,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,64,0,1,fp8,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,64,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,float16,0,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,64,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,64,128,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,float16,0,0.27663999795913696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,float16,0,0.27508799235026044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,fp8,0,0.27639466524124146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,64,128,1,fp8,fp8,0,0.2549546758333842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,fp8,0,0.27636265754699707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,float16,0,0.27727999289830524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,64,0,1,fp8,fp8,0,0.255130668481191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,float16,0,0.27849066257476807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,fp8,0,0.27559467156728107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,64,128,1,fp8,fp8,0,0.25692800680796307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,fp8,0,0.27581334114074707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,64,0,1,fp8,fp8,0,0.2560799916585286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,float16,0,0.27763734261194867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,float16,0,0.2781706651051839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,fp8,0,0.27852799495061237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,64,128,1,fp8,fp8,0,0.2582613428433736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,fp8,0,0.2773600021998088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,float16,0,0.149018665154775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,64,128,1,fp8,fp8,0,0.14586666226387024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,64,0,1,fp8,fp8,0,0.25867732365926105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,float16,0,0.14871999621391296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,fp8,0,0.14787200093269348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,fp8,0,0.14787200093269348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,64,0,1,fp8,fp8,0,0.1448853313922882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,float16,0,0.143994669119517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,float16,0,0.1442400018374125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,fp8,0,0.14389866590499878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,64,128,1,fp8,fp8,0,0.13310399651527405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,fp8,0,0.14289599657058716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,64,0,1,fp8,fp8,0,0.1323199967543284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,float16,0,0.1432319978872935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,float16,0,0.14409066239992777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,fp8,0,0.1439573367436727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,64,128,1,fp8,fp8,0,0.13351466258366904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,fp8,0,0.14402133226394653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,64,0,1,fp8,fp8,0,0.13221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,float16,0,0.14421866337458292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,float16,0,0.14388799667358398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,fp8,0,0.14385599891344705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,64,128,1,fp8,fp8,0,0.13421866297721863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,fp8,0,0.14383999506632486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,64,0,1,fp8,fp8,0,0.13424533605575562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,float16,0,0.08104533453782399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,float16,0,0.08243733147780101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,fp8,0,0.08178666730721791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,64,128,1,fp8,fp8,0,0.0776693324247996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,fp8,0,0.08268799881140391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,64,0,1,fp8,fp8,0,0.07854933540026347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,float16,0,0.0798773318529129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,float16,0,0.08036800225575765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,fp8,0,0.07914133369922638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,fp8,0,0.08015466729799907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,64,128,1,fp8,fp8,0,0.07568533221880595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,fp8,0,0.08040000001589458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,64,0,1,fp8,fp8,0,0.07530666887760162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,float16,0,0.08032000064849854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,float16,0,0.08044800162315369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,fp8,0,0.08063999811808269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,64,128,1,fp8,fp8,0,0.07489066819349925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,fp8,0,0.07906133433183034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,64,0,1,fp8,fp8,0,0.0763733337322871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,float16,0,0.08055999875068665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,float16,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,float16,0,0.0804319977760315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,64,128,1,fp8,fp8,0,0.07611200213432312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,fp8,0,0.07973866661389668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,64,0,1,fp8,fp8,0,0.07632533212502797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,float16,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,64,128,1,fp8,fp8,0,0.04721599817276001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,fp8,0,0.048783997694651283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,64,0,1,fp8,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,64,0,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,float16,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,float16,0,0.0460746685663859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,fp8,0,0.04725866516431173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,64,128,1,fp8,fp8,0,0.044735997915267944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,fp8,0,0.04590400060017904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,float16,0,0.046015997727712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,float16,0,0.04696000119050344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,fp8,0,0.046495998899141945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,64,128,1,fp8,fp8,0,0.04461333155632019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,fp8,0,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,64,0,1,fp8,fp8,0,0.04417600234349569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,float16,0,0.046816001335779824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,float16,0,0.047184000412623085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,fp8,0,0.04664533336957296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,64,128,1,fp8,fp8,0,0.04548799991607666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,64,0,1,fp8,fp8,0,0.044437333941459656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,float16,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,float16,0,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,64,128,1,fp8,fp8,0,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,64,0,1,fp8,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,float16,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,64,128,1,fp8,fp8,0,0.031152000029881794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,64,0,1,fp8,fp8,0,0.03025600065787633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,float16,0,0.031194667021433514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,64,128,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,64,0,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,64,0,1,fp8,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,float16,0,0.022656001150608063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,64,128,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,64,128,1,fp8,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,64,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,64,128,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,64,0,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,float16,0,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,fp8,0,0.022261333962281544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,64,128,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,fp8,0,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,64,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,float16,0,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,64,128,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,64,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,float16,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,float16,0,0.017781333376963932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,float16,0,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,64,128,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,64,0,1,fp8,fp8,0,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,64,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,float16,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,64,128,1,fp8,fp8,0,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,float16,0,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,fp8,0,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,64,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,64,0,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,64,128,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,64,0,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,float16,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,float16,0,0.24041599035263062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,float16,0,0.24036800861358643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,fp8,0,0.24021865924199423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,64,128,1,fp8,fp8,0,0.2221013307571411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,fp8,0,0.24045334259668985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,64,0,1,fp8,fp8,0,0.22191466887791952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,float16,0,0.24048533042271933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,float16,0,0.24036266406377158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,fp8,0,0.24014933904012045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,fp8,0,0.24015466372172037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,64,128,1,fp8,fp8,0,0.2227840026219686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,64,0,1,fp8,fp8,0,0.22190932432810465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,float16,0,0.2404693365097046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,float16,0,0.24081599712371826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,fp8,0,0.24014399449030557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,64,128,1,fp8,fp8,0,0.22311999400456747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,fp8,0,0.2404586672782898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,float16,0,0.12794666488965353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,64,0,1,fp8,fp8,0,0.22337067127227783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,float16,0,0.1279146671295166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,fp8,0,0.1277653376261393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,64,128,1,fp8,fp8,0,0.12121599912643433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,fp8,0,0.1274720033009847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,64,0,1,fp8,fp8,0,0.1216266651948293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,float16,0,0.12642666697502136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,float16,0,0.12574399511019388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,fp8,0,0.12615467111269632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,64,128,1,fp8,fp8,0,0.11959999799728394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,fp8,0,0.12581866979599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,64,0,1,fp8,fp8,0,0.11978666981061299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,64,0,1,fp8,fp8,0,0.11965866883595784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,float16,0,0.12688533465067545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,float16,0,0.1267413298288981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,float16,0,0.12735999623934427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,fp8,0,0.12713066736857095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,64,128,1,fp8,fp8,0,0.1197813351949056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,fp8,0,0.12575999895731607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,float16,0,0.12756266196568808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,fp8,0,0.12743467092514038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,fp8,0,0.0735999991496404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,64,128,1,fp8,fp8,0,0.11965866883595784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,fp8,0,0.1275200049082438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,64,0,1,fp8,fp8,0,0.11962667107582092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,float16,0,0.07252799967924754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,float16,0,0.07260799904664357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,64,128,1,fp8,fp8,0,0.06849066913127899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,fp8,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,64,0,1,fp8,fp8,0,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,float16,0,0.07057066758473714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,float16,0,0.0721013347307841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,fp8,0,0.07238399982452393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,64,128,1,fp8,fp8,0,0.06836266815662384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,fp8,0,0.07029333213965099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,64,0,1,fp8,fp8,0,0.06681600213050842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,float16,0,0.07036266724268596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,float16,0,0.07106666763623555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,fp8,0,0.07042666773001353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,64,128,1,fp8,fp8,0,0.0673173318306605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,fp8,0,0.0718560020128886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,64,0,1,fp8,fp8,0,0.06669866542021434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,64,0,1,fp8,fp8,0,0.06755733489990234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,float16,0,0.07088533540566762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,float16,0,0.041989331444104515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,float16,0,0.0724480003118515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,fp8,0,0.07087466617425282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,64,128,1,fp8,fp8,0,0.06771199901898702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,fp8,0,0.07052800059318542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,float16,0,0.04205333193143209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,fp8,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,64,128,1,fp8,fp8,0,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,64,0,1,fp8,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,float16,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,float16,0,0.04324266811211904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,64,128,1,fp8,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,fp8,0,0.04266666869322459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,64,0,1,fp8,fp8,0,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,float16,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,float16,0,0.04260266820589701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,float16,0,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,fp8,0,0.043418665726979576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,64,128,1,fp8,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,64,128,1,fp8,fp8,0,0.040864000717798867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,64,0,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,float16,0,0.042917331059773765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,fp8,0,0.04167999823888143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,fp8,0,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,64,0,1,fp8,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,64,128,1,fp8,fp8,0,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,64,0,1,fp8,fp8,0,0.02757333219051361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,float16,0,0.029359998802344005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,64,0,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,64,128,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,float16,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,float16,0,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,64,0,1,fp8,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,float16,0,0.02905600021282832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,64,128,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,64,0,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,64,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,64,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,float16,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,64,128,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,float16,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,float16,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,64,128,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,fp8,0,0.021925332645575207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,64,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,64,128,1,fp8,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,64,0,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,fp8,0,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,64,0,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,fp8,0,0.017893332988023758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,64,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,fp8,0,0.01834133391578992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,64,128,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,float16,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,64,128,1,fp8,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,64,0,1,fp8,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,64,128,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,float16,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,float16,0,1.839029312133789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,fp8,0,1.8540639877319336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,64,128,1,fp8,fp8,0,1.709328015645345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,float16,0,1.867461363474528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,fp8,0,1.8790559768676758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,64,128,1,fp8,fp8,0,1.7360906600952148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,float16,0,1.8812479972839355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,fp8,0,1.8975359598795574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,64,0,1,fp8,fp8,0,10.559797286987305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,float16,0,11.394208272298178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,64,128,1,fp8,fp8,0,1.7608052889506023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,fp8,0,11.42905044555664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,float16,0,11.439652760823568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,64,0,1,fp8,fp8,0,10.591072082519531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,fp8,0,11.432212829589844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,float16,0,1.9174559911092122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,fp8,0,1.9344479242960613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,64,128,1,fp8,fp8,0,1.7999626795450847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,float16,0,11.451786041259766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,float16,0,1.0875999927520752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,fp8,0,1.1108160018920898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,64,128,1,fp8,fp8,0,1.049605369567871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,64,0,1,fp8,fp8,0,10.603466669718424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,float16,0,5.9727732340494795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,fp8,0,11.460938771565756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,float16,0,11.52017084757487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,float16,0,0.9643946488698324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,64,0,1,fp8,fp8,0,5.53382937113444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,fp8,0,5.992645263671875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,fp8,0,0.9729653199513754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,64,0,1,fp8,fp8,0,10.663861592610678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,64,128,1,fp8,fp8,0,0.9002453486124674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,float16,0,0.9698506991068522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,fp8,0,11.513924916585287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,fp8,0,0.9780053297678629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,64,128,1,fp8,fp8,0,0.9055679639180502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,float16,0,0.9764266808827718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,float16,0,5.805509567260742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,fp8,0,0.9853599866231283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,64,0,1,fp8,fp8,0,5.380069096883138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,64,128,1,fp8,fp8,0,0.9153386751810709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,fp8,0,5.837125142415364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,float16,0,5.8023681640625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,float16,0,0.9922080039978027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,64,0,1,fp8,fp8,0,5.376757303873698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,fp8,0,5.820314407348633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,fp8,0,1.0030826727549236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,float16,0,5.831013361612956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,64,128,1,fp8,fp8,0,0.933738629023234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,float16,0,0.5971413453420004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,fp8,0,0.6109013160069784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,64,128,1,fp8,fp8,0,0.5825333197911581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,64,0,1,fp8,fp8,0,5.397631963094075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,fp8,0,5.834890365600586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,float16,0,5.839818954467773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,float16,0,3.1008532842000327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,float16,0,0.5400373140970866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,fp8,0,0.5432106653849283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,64,128,1,fp8,fp8,0,0.5088853438695272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,fp8,0,3.1127732594807944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,64,0,1,fp8,fp8,0,5.410602569580078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,fp8,0,5.850400288899739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,64,0,1,fp8,fp8,0,2.87390931447347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,float16,0,0.5420960187911987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,fp8,0,0.5455413262049357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,64,128,1,fp8,fp8,0,0.5124213298161825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,float16,0,3.021146774291992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,fp8,0,3.0224107106526694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,float16,0,0.545136014620463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,64,0,1,fp8,fp8,0,2.7985334396362305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,float16,0,3.0198240280151367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,fp8,0,0.5495466788609823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,64,128,1,fp8,fp8,0,0.5158986647923788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,fp8,0,3.022085189819336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,64,0,1,fp8,fp8,0,2.802266756693522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,float16,0,0.5533653497695923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,float16,0,3.028874715169271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,fp8,0,0.5581333239873251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,64,128,1,fp8,fp8,0,0.524394671122233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,fp8,0,3.030453364054362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,float16,0,0.3959786494572957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,64,0,1,fp8,fp8,0,2.809274673461914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,float16,0,3.0316588083902993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,fp8,0,0.3940266768137614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,64,128,1,fp8,fp8,0,0.3751893440882365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,float16,0,1.7100799878438313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,fp8,0,3.047936121622721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,float16,0,0.3957546552022298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,64,0,1,fp8,fp8,0,2.8213065465291343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,fp8,0,0.395637313524882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,64,128,1,fp8,fp8,0,0.37142399946848553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,fp8,0,1.7071092923482258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,64,0,1,fp8,fp8,0,1.5803573926289876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,float16,0,0.39450132846832275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,float16,0,1.6901334126790364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,fp8,0,0.39983999729156494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,64,128,1,fp8,fp8,0,0.3734026749928792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,fp8,0,1.6962879498799641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,64,0,1,fp8,fp8,0,1.5739572842915852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,float16,0,0.39428265889485675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,float16,0,1.6948693593343098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,fp8,0,0.39962132771809894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,64,128,1,fp8,fp8,0,0.37380798657735187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,64,0,1,fp8,fp8,0,1.5799360275268555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,fp8,0,1.6981439590454102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,float16,0,0.3961919943491618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,float16,0,1.702170689900716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,fp8,0,0.39498666922251385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,64,128,1,fp8,fp8,0,0.3755199909210205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,fp8,0,1.6936853726704915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,64,0,1,fp8,fp8,0,1.5754613876342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,float16,0,1.7074507077534993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,fp8,0,1.701680024464925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,float16,0,1.3742772738138835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,64,0,1,fp8,fp8,0,1.5750667254130046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,64,128,1,fp8,fp8,0,1.271498680114746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,fp8,0,1.387056032816569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,float16,0,1.3840053876241047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,fp8,0,1.3930400212605794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,64,128,1,fp8,fp8,0,1.2881226539611816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,float16,0,1.394426663716634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,float16,0,6.724720001220703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,fp8,0,6.733807881673177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,64,0,1,fp8,fp8,0,6.227418899536133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,fp8,0,1.4068320592244465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,64,128,1,fp8,fp8,0,1.3057546615600586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,float16,0,6.730960210164388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,fp8,0,6.750933329264323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,float16,0,1.4175146420796711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,64,0,1,fp8,fp8,0,6.252768198649089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,fp8,0,1.432213306427002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,float16,0,6.741578420003255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,64,128,1,fp8,fp8,0,1.3367466926574707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,float16,0,0.8156267007191976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,fp8,0,0.836079994837443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,64,128,1,fp8,fp8,0,0.7887252966562907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,64,0,1,fp8,fp8,0,6.2620588938395185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,fp8,0,6.765232086181641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,float16,0,3.5559094746907554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,float16,0,0.7261973222096761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,float16,0,6.79368527730306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,fp8,0,0.731216033299764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,64,128,1,fp8,fp8,0,0.6786080201466879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,fp8,0,3.5822134017944336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,64,0,1,fp8,fp8,0,6.302581151326497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,64,0,1,fp8,fp8,0,3.3058719635009766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,fp8,0,6.797146479288737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,float16,0,0.7288320064544678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,fp8,0,0.7363306681315104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,64,128,1,fp8,fp8,0,0.6823306878407797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,float16,0,3.4696426391601562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,float16,0,0.7361120382944742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,fp8,0,0.7420053482055664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,fp8,0,3.449909210205078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,64,0,1,fp8,fp8,0,3.1912320454915366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,64,128,1,fp8,fp8,0,0.6906826496124268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,float16,0,3.450624148050944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,fp8,0,3.451509475708008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,64,0,1,fp8,fp8,0,3.2009013493855796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,float16,0,0.7462186813354492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,fp8,0,0.7569386959075928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,float16,0,3.4527839024861655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,64,128,1,fp8,fp8,0,0.7026879787445068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,float16,0,0.4516640106836955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,fp8,0,0.46250665187835693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,64,0,1,fp8,fp8,0,3.2089172999064126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,fp8,0,3.460277239481608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,64,128,1,fp8,fp8,0,0.44149335225423175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,float16,0,3.4787041346232095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,float16,0,1.8657600084940593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,float16,0,0.4079200029373169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,64,0,1,fp8,fp8,0,3.2177600860595703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,fp8,0,3.4825334548950195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,fp8,0,0.4100480079650879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,fp8,0,1.8798559506734211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,64,0,1,fp8,fp8,0,1.7401599884033203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,64,128,1,fp8,fp8,0,0.38571735223134357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,float16,0,0.4089599847793579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,float16,0,1.8108320236206055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,fp8,0,0.4123733441034953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,64,128,1,fp8,fp8,0,0.38786665598551434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,fp8,0,1.8094666798909504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,64,0,1,fp8,fp8,0,1.684618631998698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,float16,0,0.41272000471750897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,float16,0,1.8127999305725098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,fp8,0,0.415829340616862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,64,128,1,fp8,fp8,0,0.39214932918548584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,fp8,0,1.8150399525960286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,64,0,1,fp8,fp8,0,1.6909066836039226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,float16,0,1.812986691792806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,float16,0,0.4185333251953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,fp8,0,0.42303466796875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,64,128,1,fp8,fp8,0,0.3998986482620239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,64,0,1,fp8,fp8,0,1.6921653747558594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,fp8,0,1.818079948425293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,float16,0,0.30355199178059894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,fp8,0,0.3036213318506877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,float16,0,1.825178623199463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,64,128,1,fp8,fp8,0,0.2871359984079997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,float16,0,1.058245340983073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,64,0,1,fp8,fp8,0,1.6955893834431965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,float16,0,0.30190932750701904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,fp8,0,1.8340373039245605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,64,128,1,fp8,fp8,0,0.2837226589520772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,fp8,0,1.05131729443868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,fp8,0,0.30185600121816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,64,0,1,fp8,fp8,0,0.9747786521911621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,float16,0,1.047648032506307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,float16,0,0.30296534299850464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,fp8,0,0.3016586701075236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,fp8,0,1.0444479783376057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,64,0,1,fp8,fp8,0,0.976469357808431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,64,128,1,fp8,fp8,0,0.28621333837509155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,float16,0,1.0475146770477295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,float16,0,0.30186667044957477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,fp8,0,1.0446826616923015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,fp8,0,0.30318933725357056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,64,0,1,fp8,fp8,0,0.9762826760609945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,64,128,1,fp8,fp8,0,0.2871680061022441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,float16,0,1.0456960201263428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,float16,0,0.30273600419362384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,fp8,0,1.0502560138702393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,fp8,0,0.3038453261057536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,64,0,1,fp8,fp8,0,0.9775733153025309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,64,128,1,fp8,fp8,0,0.2855839927991231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,float16,0,1.047397295633952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,fp8,0,1.0520959695180256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,64,0,1,fp8,fp8,0,0.9750666618347168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,float16,0,1.1452960173288982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,fp8,0,1.1541546980539958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,64,128,1,fp8,fp8,0,1.059173345565796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,float16,0,1.1512586275736492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,fp8,0,1.1612799962361653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,64,128,1,fp8,fp8,0,1.0708800156911213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,float16,0,1.1595679918924968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,float16,0,4.849439938863118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,64,0,1,fp8,fp8,0,4.47925345102946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,fp8,0,4.850730578104655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,fp8,0,1.1707093715667725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,float16,0,4.849866549173991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,64,128,1,fp8,fp8,0,1.0816906293233235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,fp8,0,4.856245358784993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,64,0,1,fp8,fp8,0,4.503328005472819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,float16,0,1.1792960166931152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,fp8,0,1.191327969233195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,float16,0,4.857162793477376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,64,128,1,fp8,fp8,0,1.1078399817148845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,float16,0,0.6827519734700521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,fp8,0,0.69706130027771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,64,0,1,fp8,fp8,0,4.508069356282552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,64,128,1,fp8,fp8,0,0.6601279973983765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,fp8,0,4.866410573323567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,float16,0,2.583338737487793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,float16,0,4.903903961181641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,float16,0,0.6063253482182821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,fp8,0,4.91047477722168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,fp8,0,0.6128799915313721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,64,0,1,fp8,fp8,0,4.540106773376465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,64,128,1,fp8,fp8,0,0.5677599906921387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,fp8,0,2.602000077565511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,64,0,1,fp8,fp8,0,2.40720001856486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,float16,0,2.4870293935139975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,float16,0,0.6105813185373942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,fp8,0,0.6151359875996908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,64,128,1,fp8,fp8,0,0.5718293190002441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,fp8,0,2.4986133575439453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,float16,0,0.6138986746470133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,64,0,1,fp8,fp8,0,2.317509333292643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,fp8,0,0.6213066577911377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,float16,0,2.4990933736165366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,64,128,1,fp8,fp8,0,0.5776426792144775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,64,0,1,fp8,fp8,0,2.3148372968037925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,fp8,0,2.502682685852051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,float16,0,0.6248693466186523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,float16,0,2.500282605489095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,fp8,0,0.6317333380381266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,64,128,1,fp8,fp8,0,0.5896533330281576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,fp8,0,2.512714703877767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,float16,0,0.38140801588694256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,64,0,1,fp8,fp8,0,2.3267787297566733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,fp8,0,0.38953598340352374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,64,128,1,fp8,fp8,0,0.3723946809768677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,float16,0,2.5118187268575034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,float16,0,1.3675999641418457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,float16,0,0.341322660446167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,64,0,1,fp8,fp8,0,2.3332907358805337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,fp8,0,2.525760014851888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,fp8,0,0.3444853226343791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,fp8,0,1.3780585924784343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,64,0,1,fp8,fp8,0,1.280191977818807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,float16,0,1.3174293041229248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,64,128,1,fp8,fp8,0,0.3243839939435323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,float16,0,0.34385065237681073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,fp8,0,0.3468746741612752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,64,128,1,fp8,fp8,0,0.3285599946975708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,fp8,0,1.3202719688415527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,64,0,1,fp8,fp8,0,1.2327679793039958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,float16,0,0.3487146695454915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,fp8,0,0.35157867272694904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,float16,0,1.318399985631307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,64,128,1,fp8,fp8,0,0.33022934198379517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,fp8,0,1.3232426643371582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,64,0,1,fp8,fp8,0,1.2314026355743408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,float16,0,0.35266133149464923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,float16,0,1.3257386684417725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,fp8,0,0.3567359844843547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,fp8,0,1.331269343694051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,64,128,1,fp8,fp8,0,0.3364746570587158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,64,0,1,fp8,fp8,0,1.2378079891204834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,float16,0,0.25414933760960895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,float16,0,1.3311839898427327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,64,128,1,fp8,fp8,0,0.2421600023905436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,fp8,0,0.25438400109608966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,fp8,0,1.336085319519043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,float16,0,0.7880799770355225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,64,0,1,fp8,fp8,0,1.2443093458811443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,float16,0,0.2526506582895915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,64,128,1,fp8,fp8,0,0.23852266867955527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,float16,0,0.7767573197682699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,fp8,0,0.7865386803944906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,64,0,1,fp8,fp8,0,0.7302026748657227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,fp8,0,0.25145600239435834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,float16,0,0.2510559956232707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,fp8,0,0.7827306588490804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,64,0,1,fp8,fp8,0,0.7285333474477133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,fp8,0,0.25252799193064374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,float16,0,0.25217066208521527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,64,128,1,fp8,fp8,0,0.23825599749883017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,float16,0,0.7767679691314697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,fp8,0,0.7829386393229166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,64,0,1,fp8,fp8,0,0.7275520165761312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,fp8,0,0.25377599398295086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,float16,0,0.2525866627693176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,64,128,1,fp8,fp8,0,0.240282674630483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,float16,0,0.7833173274993896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,64,128,1,fp8,fp8,0,0.24081067244211832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,fp8,0,0.7790506680806478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,64,0,1,fp8,fp8,0,0.7312106291453043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,fp8,0,0.25412267446517944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,float16,0,0.7848853270212809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,fp8,0,0.7798399925231934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,64,0,1,fp8,fp8,0,0.7264320055643717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,float16,0,1.7901066144307454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,fp8,0,1.8018293380737305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,64,128,1,fp8,fp8,0,1.6581494013468425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,float16,0,1.8149545987447102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,fp8,0,1.829301357269287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,64,128,1,fp8,fp8,0,1.6861066818237305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,float16,0,6.432863871256511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,float16,0,1.8317119280497234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,64,0,1,fp8,fp8,0,5.958874384562175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,fp8,0,6.443391799926758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,float16,0,6.449514389038086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,fp8,0,1.8465654055277507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,64,128,1,fp8,fp8,0,1.7073440551757812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,64,0,1,fp8,fp8,0,5.9747358957926435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,fp8,0,6.471162796020508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,float16,0,1.8655840555826824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,float16,0,6.4778397878011065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,fp8,0,1.8811839421590169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,64,128,1,fp8,fp8,0,1.749776045481364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,float16,0,1.0409706433614094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,fp8,0,1.0629279613494873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,64,0,1,fp8,fp8,0,6.012874603271484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,fp8,0,6.500021616617839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,64,128,1,fp8,fp8,0,1.0007092952728271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,float16,0,3.418224016825358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,float16,0,6.534832000732422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,float16,0,0.9149119853973389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,fp8,0,6.547711690266927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,64,0,1,fp8,fp8,0,6.046202977498372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,fp8,0,3.434490521748861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,fp8,0,0.9236106872558594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,64,128,1,fp8,fp8,0,0.8494773705800375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,64,0,1,fp8,fp8,0,3.18065611521403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,float16,0,0.9202933311462402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,fp8,0,0.9280426502227783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,float16,0,3.2616427739461265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,64,128,1,fp8,fp8,0,0.8570666313171387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,64,0,1,fp8,fp8,0,3.022181193033854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,fp8,0,3.271333376566569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,float16,0,0.9276639620463053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,float16,0,3.2725280125935874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,fp8,0,0.9384640057881674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,64,128,1,fp8,fp8,0,0.8666559855143229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,fp8,0,3.2795893351236978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,64,0,1,fp8,fp8,0,3.029930750528971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,float16,0,0.9448266824086508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,fp8,0,0.9551200071970621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,float16,0,3.281599998474121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,64,128,1,fp8,fp8,0,0.8845280011494955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,float16,0,0.5483253399531046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,64,0,1,fp8,fp8,0,3.0392319361368814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,fp8,0,3.2927894592285156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,fp8,0,0.5612586736679077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,float16,0,3.3063360850016275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,64,128,1,fp8,fp8,0,0.5313119888305664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,float16,0,1.7674560546875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,float16,0,0.48817598819732666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,fp8,0,0.4924693504969279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,64,128,1,fp8,fp8,0,0.4588693380355835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,fp8,0,3.313173294067383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,64,0,1,fp8,fp8,0,3.066394805908203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,fp8,0,1.7769546508789062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,64,0,1,fp8,fp8,0,1.647999922434489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,float16,0,1.6919253667195637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,float16,0,0.49004268646240234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,fp8,0,0.49584531784057617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,fp8,0,1.6911199887593586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,64,128,1,fp8,fp8,0,0.4604853391647339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,64,0,1,fp8,fp8,0,1.5704800287882488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,float16,0,0.49395732084910077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,float16,0,1.6916853586832683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,fp8,0,0.4984159866968791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,64,128,1,fp8,fp8,0,0.46609067916870117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,fp8,0,1.6980692545572917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,64,0,1,fp8,fp8,0,1.5796373685201008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,float16,0,0.5017973184585571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,float16,0,1.702389399210612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,fp8,0,0.5087573528289795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,64,128,1,fp8,fp8,0,0.47387198607126874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,fp8,0,1.7036426862080891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,64,0,1,fp8,fp8,0,1.5812533696492512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,float16,0,0.305402676264445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,fp8,0,0.31386667490005493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,float16,0,1.7093547185262044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,64,128,1,fp8,fp8,0,0.2998826702435811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,float16,0,0.9450186888376871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,float16,0,0.2714826663335164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,fp8,0,1.719146728515625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,64,0,1,fp8,fp8,0,1.5874826113382976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,fp8,0,0.952176014582316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,fp8,0,0.2732693354288737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,64,0,1,fp8,fp8,0,0.8868746757507324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,float16,0,0.27316800753275555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,64,128,1,fp8,fp8,0,0.26065067450205487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,64,0,1,fp8,fp8,0,0.8473707040150961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,float16,0,0.9018826484680176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,fp8,0,0.9050133228302002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,fp8,0,0.27591466903686523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,64,128,1,fp8,fp8,0,0.2627200086911519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,float16,0,0.9056053161621094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,float16,0,0.9107413291931152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,float16,0,0.2770079970359802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,fp8,0,0.9056746959686279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,64,0,1,fp8,fp8,0,0.8468320369720459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,fp8,0,0.2785653273264567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,64,128,1,fp8,fp8,0,0.26549333333969116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,fp8,0,0.9158346652984619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,float16,0,0.283242662747701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,64,0,1,fp8,fp8,0,0.8517866929372152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,fp8,0,0.28648533423741657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,64,128,1,fp8,fp8,0,0.2705120046933492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,fp8,0,0.9215146700541178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,float16,0,0.553765336672465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,float16,0,0.9183093706766764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,float16,0,0.20546666781107584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,fp8,0,0.20723734299341837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,64,0,1,fp8,fp8,0,0.8554879824320475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,64,128,1,fp8,fp8,0,0.1954080065091451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,fp8,0,0.5577760140101115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,float16,0,0.20137600104014078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,64,0,1,fp8,fp8,0,0.5147626797358195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,fp8,0,0.20124266544977823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,float16,0,0.5479306777318319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,64,128,1,fp8,fp8,0,0.19510932763417563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,fp8,0,0.548794666926066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,float16,0,0.20335467656453451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,64,0,1,fp8,fp8,0,0.5127946535746256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,fp8,0,0.20142932732899985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,float16,0,0.5480373303095499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,64,128,1,fp8,fp8,0,0.19330666462580362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,fp8,0,0.5510986646016439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,float16,0,0.20468266805013022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,64,0,1,fp8,fp8,0,0.5145386854807535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,fp8,0,0.20426666736602783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,float16,0,0.5493119955062866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,64,128,1,fp8,fp8,0,0.19273066520690918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,fp8,0,0.5500799814860026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,float16,0,0.20525866746902466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,64,0,1,fp8,fp8,0,0.5168533325195312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,fp8,0,0.20535999536514282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,float16,0,0.5520373185475668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,64,128,1,fp8,fp8,0,0.19318399826685587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,fp8,0,0.5507573286692301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,64,0,1,fp8,fp8,0,0.5155413150787354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,float16,0,1.3347466786702473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,fp8,0,1.3479359944661458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,64,128,1,fp8,fp8,0,1.2337120374043782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,float16,0,1.3459092775980632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,fp8,0,1.356064001719157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,64,128,1,fp8,fp8,0,1.2506346702575684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,float16,0,3.8905439376831055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,64,0,1,fp8,fp8,0,3.5968214670817056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,fp8,0,3.8954505920410156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,float16,0,1.3563733100891113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,float16,0,3.9016106923421225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,fp8,0,1.3681440353393555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,64,128,1,fp8,fp8,0,1.2647199630737305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,64,0,1,fp8,fp8,0,3.625962575276693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,fp8,0,3.9168640772501626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,float16,0,1.3826026916503906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,float16,0,3.9147411982218423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,fp8,0,1.3952693939208984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,64,128,1,fp8,fp8,0,1.2984320322672527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,fp8,0,3.9269065856933594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,float16,0,0.7809706528981527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,64,0,1,fp8,fp8,0,3.636122703552246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,fp8,0,0.801093339920044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,float16,0,3.9547786712646484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,64,128,1,fp8,fp8,0,0.7523840268452963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,float16,0,2.0954880714416504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,float16,0,0.6892746289571127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,fp8,0,3.970063845316569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,fp8,0,0.6951999664306641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,64,0,1,fp8,fp8,0,3.6661974589029946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,64,128,1,fp8,fp8,0,0.6411413351694742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,fp8,0,2.1138134002685547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,64,0,1,fp8,fp8,0,1.9583093325297039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,float16,0,1.9894986152648926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,float16,0,0.6934613386789957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,fp8,0,0.7006933689117432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,64,128,1,fp8,fp8,0,0.6463626623153687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,fp8,0,1.9976800282796223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,64,0,1,fp8,fp8,0,1.8489813804626465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,float16,0,0.6992106437683105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,float16,0,1.9946613311767578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,fp8,0,0.7073173522949219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,64,128,1,fp8,fp8,0,0.6554133494695028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,fp8,0,2.001493295033773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,64,0,1,fp8,fp8,0,1.8524106343587239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,float16,0,0.7108533382415771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,float16,0,1.9995360374450684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,fp8,0,0.7196213404337565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,64,128,1,fp8,fp8,0,0.6680320103963217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,fp8,0,2.0115466117858887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,64,0,1,fp8,fp8,0,1.8569226264953613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,float16,0,0.41283734639485675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,fp8,0,0.42446935176849365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,float16,0,2.0157972971598306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,64,128,1,fp8,fp8,0,0.4020906686782837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,float16,0,1.0958080291748047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,fp8,0,2.0272480646769204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,fp8,0,1.103274663289388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,64,0,1,fp8,fp8,0,1.8699146906534831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,float16,0,0.36710933844248456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,64,0,1,fp8,fp8,0,1.024282693862915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,fp8,0,0.3693600098292033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,64,128,1,fp8,fp8,0,0.3470240036646525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,float16,0,1.0378666718800862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,float16,0,0.3691306511561076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,fp8,0,1.0433173179626465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,64,0,1,fp8,fp8,0,0.9696106910705566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,float16,0,1.0422186851501465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,fp8,0,0.3723520040512085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,64,128,1,fp8,fp8,0,0.3507839838663737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,fp8,0,1.046277364095052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,float16,0,0.37324798107147217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,64,0,1,fp8,fp8,0,0.9728906949361166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,fp8,0,0.3761599858601888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,64,128,1,fp8,fp8,0,0.35359466075897217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,float16,0,1.0482827027638753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,float16,0,0.3806026776631673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,64,0,1,fp8,fp8,0,0.9777119954427084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,fp8,0,0.3838026523590088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,fp8,0,1.0488053162892659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,64,128,1,fp8,fp8,0,0.36002667744954425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,float16,0,1.0562187035878499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,float16,0,0.23628799120585123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,fp8,0,1.0620266596476238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,fp8,0,0.2421919902165731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,64,0,1,fp8,fp8,0,0.9842080275217692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,float16,0,0.5948319832483927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,64,128,1,fp8,fp8,0,0.22981866200764975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,fp8,0,0.602837324142456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,float16,0,0.20758932828903198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,64,0,1,fp8,fp8,0,0.5618186791737875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,fp8,0,0.20934400955835977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,float16,0,0.5627839962641398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,64,128,1,fp8,fp8,0,0.20027732849121094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,fp8,0,0.5631946722666422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,fp8,0,0.21067200104395548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,float16,0,0.2088373303413391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,64,0,1,fp8,fp8,0,0.530789335568746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,float16,0,0.5651520093282064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,64,128,1,fp8,fp8,0,0.202400008837382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,float16,0,0.21000534296035767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,fp8,0,0.5660426616668701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,64,0,1,fp8,fp8,0,0.5312373240788778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,fp8,0,0.21377599239349365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,float16,0,0.5680426756540934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,64,128,1,fp8,fp8,0,0.20542933543523154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,fp8,0,0.5698506832122803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,float16,0,0.21625065803527832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,64,0,1,fp8,fp8,0,0.5338773330052694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,fp8,0,0.21894399325052896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,float16,0,0.5740106503168741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,64,128,1,fp8,fp8,0,0.20928533871968588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,float16,0,0.16054933269818625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,64,0,1,fp8,fp8,0,0.5397760073343912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,fp8,0,0.5774720112482706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,float16,0,0.3617066542307536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,fp8,0,0.16207999984423319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,64,128,1,fp8,fp8,0,0.15410666664441428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,fp8,0,0.3614399830500285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,float16,0,0.1566933294137319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,64,0,1,fp8,fp8,0,0.338373343149821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,fp8,0,0.1585599978764852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,float16,0,0.3569706678390503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,64,128,1,fp8,fp8,0,0.15003732840220133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,fp8,0,0.3588266770044963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,float16,0,0.15824533502260843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,64,0,1,fp8,fp8,0,0.3346133232116699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,fp8,0,0.16019200285275778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,float16,0,0.3555893500645955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,64,128,1,fp8,fp8,0,0.15065067013104758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,fp8,0,0.35749868551890057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,64,0,1,fp8,fp8,0,0.3347413142522176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,float16,0,0.16012266278266907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,float16,0,0.35573867956797284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,fp8,0,0.16018666823705038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,64,128,1,fp8,fp8,0,0.15218666195869446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,fp8,0,0.3566186825434367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,64,128,1,fp8,fp8,0,0.15009599924087524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,64,0,1,fp8,fp8,0,0.33537065982818604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,float16,0,0.15779200196266174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,fp8,0,0.160671999057134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,float16,0,0.3590773344039917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,fp8,0,0.3594079812367757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,64,0,1,fp8,fp8,0,0.3372960090637207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,float16,0,1.7643359502156575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,fp8,0,1.7762239774068196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,64,128,1,fp8,fp8,0,1.6317280133565266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,float16,0,1.788981278737386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,fp8,0,1.8012852668762207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,float16,0,3.899600028991699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,64,128,1,fp8,fp8,0,1.6613012949625652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,64,0,1,fp8,fp8,0,3.6189705530802407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,fp8,0,3.919072151184082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,float16,0,3.934581438700358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,float16,0,1.8044053713480632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,fp8,0,3.949808120727539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,64,128,1,fp8,fp8,0,1.6818772951761882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,64,0,1,fp8,fp8,0,3.6518987019856772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,fp8,0,1.819445292154948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,float16,0,1.840943972269694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,float16,0,3.9459412892659507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,fp8,0,3.965749422709147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,fp8,0,1.856064001719157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,64,0,1,fp8,fp8,0,3.6726719538370767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,64,128,1,fp8,fp8,0,1.7230559984842937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,float16,0,1.0160319805145264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,fp8,0,1.0355093479156494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,64,128,1,fp8,fp8,0,0.9738826751708984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,float16,0,4.00111452738444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,float16,0,2.1184585889180503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,fp8,0,4.0172373453776045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,float16,0,0.8889173666636149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,fp8,0,2.1366559664408364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,fp8,0,0.8982079823811849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,64,0,1,fp8,fp8,0,1.983733336130778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,64,0,1,fp8,fp8,0,3.709749221801758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,64,128,1,fp8,fp8,0,0.8242293198903402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,float16,0,1.9747519493103027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,float16,0,0.8965919812520345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,fp8,0,0.9026292959849039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,64,128,1,fp8,fp8,0,0.8313546975453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,fp8,0,1.983232021331787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,64,0,1,fp8,fp8,0,1.8301386833190918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,float16,0,1.9836692810058594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,float16,0,0.9018399715423584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,fp8,0,1.991696039835612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,fp8,0,0.9123093287150065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,64,128,1,fp8,fp8,0,0.8396693070729574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,64,0,1,fp8,fp8,0,1.8401333491007488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,float16,0,1.9866773287455242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,float16,0,0.9186933040618896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,fp8,0,2.001322587331136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,fp8,0,0.9291040102640787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,64,128,1,fp8,fp8,0,0.8597813447316488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,64,0,1,fp8,fp8,0,1.847306728363037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,float16,0,0.5272266864776611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,float16,0,2.010570685068766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,fp8,0,0.5374240080515543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,64,128,1,fp8,fp8,0,0.5069760084152222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,float16,0,1.0893226464589436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,fp8,0,2.016080061594645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,64,0,1,fp8,fp8,0,1.8674987157185872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,float16,0,0.4626346826553345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,fp8,0,1.1032426357269287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,64,0,1,fp8,fp8,0,1.0262293020884197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,fp8,0,0.4671413501103719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,64,128,1,fp8,fp8,0,0.43273599942525226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,float16,0,1.017855962117513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,float16,0,0.4669333299001058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,fp8,0,0.47141865889231366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,fp8,0,1.0225173632303874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,64,0,1,fp8,fp8,0,0.9502666791280111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,64,128,1,fp8,fp8,0,0.4374613364537557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,float16,0,1.0226506392161052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,float16,0,0.47068266073862713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,fp8,0,1.0247466564178467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,fp8,0,0.47520001729329425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,64,128,1,fp8,fp8,0,0.44096000989278156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,float16,0,1.0303146839141846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,float16,0,0.47815465927124023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,64,0,1,fp8,fp8,0,0.9534826278686523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,64,0,1,fp8,fp8,0,0.9605866273244222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,fp8,0,1.0316480000813801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,fp8,0,0.4850986798604329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,64,128,1,fp8,fp8,0,0.44994131724039715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,float16,0,1.0351786613464355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,float16,0,0.28037865956624347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,fp8,0,0.2873973250389099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,float16,0,0.5785813331604004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,fp8,0,1.0449706713358562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,64,128,1,fp8,fp8,0,0.27427200476328534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,64,0,1,fp8,fp8,0,0.966912031173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,fp8,0,0.5858720143636068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,float16,0,0.244704008102417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,64,0,1,fp8,fp8,0,0.5447573264439901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,fp8,0,0.2477173407872518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,float16,0,0.5379306475321451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,64,128,1,fp8,fp8,0,0.23621867100397745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,fp8,0,0.5414933363596598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,float16,0,0.2483146588007609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,64,0,1,fp8,fp8,0,0.5082186857859293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,fp8,0,0.5435839891433716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,fp8,0,0.24843200047810873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,float16,0,0.5394773483276367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,64,128,1,fp8,fp8,0,0.23738133907318115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,64,0,1,fp8,fp8,0,0.5098079840342203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,float16,0,0.25142399470011395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,fp8,0,0.2546986738840739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,float16,0,0.2564479907353719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,float16,0,0.546554684638977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,64,128,1,fp8,fp8,0,0.24010666211446127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,fp8,0,0.5470453500747681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,64,0,1,fp8,fp8,0,0.5117226839065552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,fp8,0,0.2591093381245931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,float16,0,0.5517386595408121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,64,128,1,fp8,fp8,0,0.24648533264795938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,fp8,0,0.5548106829325358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,float16,0,0.1585599978764852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,64,0,1,fp8,fp8,0,0.5177919864654541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,float16,0,0.1383519967397054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,float16,0,0.3232799967130025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,fp8,0,0.16237333416938782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,64,128,1,fp8,fp8,0,0.1581653356552124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,fp8,0,0.3266879916191101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,64,0,1,fp8,fp8,0,0.30900800228118896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,float16,0,0.2999573349952698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,fp8,0,0.1397173305352529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,64,128,1,fp8,fp8,0,0.13329066832860312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,fp8,0,0.30101333061854046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,64,0,1,fp8,fp8,0,0.28148800134658813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,fp8,0,0.30323199431101483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,float16,0,0.13769066333770752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,fp8,0,0.1397599975268046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,float16,0,0.3002026677131653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,64,128,1,fp8,fp8,0,0.13166399796803793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,64,0,1,fp8,fp8,0,0.2815893292427063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,fp8,0,0.30424533287684125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,float16,0,0.13991467157999674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,fp8,0,0.14177067081133524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,float16,0,0.30186667044957477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,64,128,1,fp8,fp8,0,0.1356000006198883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,64,0,1,fp8,fp8,0,0.2855946620305379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,float16,0,0.14404267072677612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,float16,0,0.3067199985186259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,fp8,0,0.14458133776982626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,64,128,1,fp8,fp8,0,0.14181333780288696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,fp8,0,0.30801600217819214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,64,128,1,fp8,fp8,0,0.10698666175206502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,64,0,1,fp8,fp8,0,0.29259200890858966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,float16,0,0.11104533076286316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,float16,0,0.20545067389806113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,fp8,0,0.10944533348083496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,fp8,0,0.10972266395886739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,fp8,0,0.20521066586176553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,64,0,1,fp8,fp8,0,0.19230933984120688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,float16,0,0.10977066556612651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,float16,0,0.20392000675201416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,64,128,1,fp8,fp8,0,0.1063253382841746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,fp8,0,0.11107732852300008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,fp8,0,0.202949325243632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,64,0,1,fp8,fp8,0,0.19222933053970337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,float16,0,0.10934933026631673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,float16,0,0.20295466979344687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,64,128,1,fp8,fp8,0,0.10518399874369304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,float16,0,0.20336000124613443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,fp8,0,0.20459733406702676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,64,0,1,fp8,fp8,0,0.1927893360455831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,float16,0,0.1093280017375946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,fp8,0,0.11114666859308879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,64,128,1,fp8,fp8,0,0.10523733496665955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,fp8,0,0.20383467276891074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,64,0,1,fp8,fp8,0,0.19134400288263956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,float16,0,0.10941867033640544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,float16,0,0.20430932442347208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,fp8,0,0.10900266965230306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,64,128,1,fp8,fp8,0,0.10689600308736165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,fp8,0,0.20334400733311972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,64,0,1,fp8,fp8,0,0.19328000148137411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,float16,0,1.3152960141499836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,64,128,1,fp8,fp8,0,1.2139093081156414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,fp8,0,1.3225066661834717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,float16,0,1.324191967646281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,float16,0,2.448570728302002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,fp8,0,2.465669314066569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,fp8,0,1.336751937866211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,64,0,1,fp8,fp8,0,2.275279998779297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,64,128,1,fp8,fp8,0,1.2315253416697185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,float16,0,2.4671093622843423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,float16,0,1.336309274037679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,fp8,0,2.4727039337158203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,fp8,0,1.3476853370666504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,64,128,1,fp8,fp8,0,1.2462186813354492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,64,0,1,fp8,fp8,0,2.291701316833496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,float16,0,2.4822400410970054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,float16,0,1.362768014272054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,fp8,0,2.4911413192749023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,64,0,1,fp8,fp8,0,2.3107199668884277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,fp8,0,1.37555726369222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,64,128,1,fp8,fp8,0,1.275216023127238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,float16,0,0.7642026742299398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,float16,0,2.506480058034261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,fp8,0,0.7794079780578613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,64,128,1,fp8,fp8,0,0.7326933542887369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,float16,0,1.3560800552368164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,fp8,0,2.517146587371826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,fp8,0,1.3698132832845051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,64,0,1,fp8,fp8,0,2.3437013626098633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,float16,0,0.6682186921437582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,fp8,0,0.6753866672515869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,64,0,1,fp8,fp8,0,1.274458646774292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,64,128,1,fp8,fp8,0,0.6226239999135336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,float16,0,1.249066670735677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,float16,0,0.6747840245564779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,fp8,0,1.2517013549804688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,64,0,1,fp8,fp8,0,1.1622133255004883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,64,128,1,fp8,fp8,0,0.6275999943415324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,fp8,0,0.6816480159759521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,float16,0,1.2517813046773274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,float16,0,0.6824959913889567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,fp8,0,1.2613013585408528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,64,0,1,fp8,fp8,0,1.1694933573404949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,fp8,0,0.6886773109436035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,64,128,1,fp8,fp8,0,0.6340426603953043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,float16,0,1.2595893541971843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,float16,0,0.6932960351308187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,fp8,0,1.2698132991790771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,64,0,1,fp8,fp8,0,1.1732373237609863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,fp8,0,0.7010506788889567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,64,128,1,fp8,fp8,0,0.6475306749343872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,float16,0,1.2750720183054607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,float16,0,0.398911992708842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,fp8,0,0.40595734119415283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,64,128,1,fp8,fp8,0,0.3843626578648885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,float16,0,0.7042826811472574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,64,0,1,fp8,fp8,0,1.1894079844156902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,fp8,0,1.2840960025787354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,fp8,0,0.7124640146891276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,64,128,1,fp8,fp8,0,0.33057600259780884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,float16,0,0.6483680009841919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,float16,0,0.3490080038706462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,64,0,1,fp8,fp8,0,0.6649333238601685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,fp8,0,0.35276798407236737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,fp8,0,0.6516000032424927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,float16,0,0.3508799870808919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,64,0,1,fp8,fp8,0,0.6091573238372803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,fp8,0,0.35306668281555176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,float16,0,0.650709350903829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,float16,0,0.3554133176803589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,64,128,1,fp8,fp8,0,0.3327893416086833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,fp8,0,0.6551253398259481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,64,0,1,fp8,fp8,0,0.6123040119806925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,fp8,0,0.35887467861175537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,float16,0,0.6558186610539755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,64,128,1,fp8,fp8,0,0.33631467819213867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,fp8,0,0.661301334698995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,float16,0,0.3627786636352539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,64,0,1,fp8,fp8,0,0.616213321685791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,fp8,0,0.3670346736907959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,float16,0,0.661840001742045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,64,128,1,fp8,fp8,0,0.34282131989796955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,float16,0,0.21690666675567627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,fp8,0,0.668170690536499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,64,0,1,fp8,fp8,0,0.6219093402226766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,float16,0,0.37809598445892334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,fp8,0,0.2218666672706604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,64,128,1,fp8,fp8,0,0.2113920052846273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,fp8,0,0.3819626569747925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,64,0,1,fp8,fp8,0,0.3594346841176351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,float16,0,0.18685332934061685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,float16,0,0.3449173370997111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,fp8,0,0.1891146699587504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,64,128,1,fp8,fp8,0,0.18091734250386557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,fp8,0,0.34694401423136395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,float16,0,0.18773865699768066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,fp8,0,0.35045866171518963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,64,0,1,fp8,fp8,0,0.32846933603286743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,float16,0,0.34641067186991376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,fp8,0,0.18966933091481528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,64,128,1,fp8,fp8,0,0.18290134270985922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,64,0,1,fp8,fp8,0,0.33029866218566895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,float16,0,0.19105599323908487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,fp8,0,0.19325866301854452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,float16,0,0.35028799374898273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,64,128,1,fp8,fp8,0,0.1857866644859314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,fp8,0,0.35125335057576496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,float16,0,0.19620800018310547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,fp8,0,0.35842665036519367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,float16,0,0.12504000465075174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,64,0,1,fp8,fp8,0,0.3383466800053914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,64,0,1,fp8,fp8,0,0.33269866307576496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,float16,0,0.3548640012741089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,fp8,0,0.199237326780955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,64,128,1,fp8,fp8,0,0.19195733467737833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,64,0,1,fp8,fp8,0,0.20716800292332968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,float16,0,0.21388266483942667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,fp8,0,0.12779200077056885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,64,128,1,fp8,fp8,0,0.12577600280443826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,fp8,0,0.21584532658259073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,float16,0,0.11053867141405742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,float16,0,0.19949867328008017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,fp8,0,0.11100799838701884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,64,128,1,fp8,fp8,0,0.1050879955291748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,64,128,1,fp8,fp8,0,0.10486933588981628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,fp8,0,0.19914666811625162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,64,0,1,fp8,fp8,0,0.1861226757367452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,64,0,1,fp8,fp8,0,0.18718934059143066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,float16,0,0.11099732915560405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,float16,0,0.11105599999427795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,float16,0,0.19776533047358194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,fp8,0,0.11213866869608562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,fp8,0,0.1994453271230062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,float16,0,0.20058667659759521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,fp8,0,0.11239999532699585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,64,128,1,fp8,fp8,0,0.1069653332233429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,64,128,1,fp8,fp8,0,0.10902399818102519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,fp8,0,0.20113066832224527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,64,0,1,fp8,fp8,0,0.18784532944361368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,float16,0,0.11307199796040852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,float16,0,0.14009066422780356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,float16,0,0.20145599047342935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,fp8,0,0.11559999982515971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,fp8,0,0.14076266686121622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,fp8,0,0.20177066326141357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,64,0,1,fp8,fp8,0,0.19171200195948282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,float16,0,0.084714670976003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,fp8,0,0.0846506655216217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,64,128,1,fp8,fp8,0,0.08241066833337148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,fp8,0,0.14056533575057983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,64,0,1,fp8,fp8,0,0.13197333614031473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,float16,0,0.0867146650950114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,float16,0,0.1420693298180898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,float16,0,0.14082133769989014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,fp8,0,0.08674133817354839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,64,128,1,fp8,fp8,0,0.08247466882069905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,64,0,1,fp8,fp8,0,0.13291733463605246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,float16,0,0.08678399523099263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,fp8,0,0.0848426620165507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,64,128,1,fp8,fp8,0,0.08268799881140391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,fp8,0,0.14152000347773233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,fp8,0,0.141157329082489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,64,0,1,fp8,fp8,0,0.1318826675415039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,float16,0,0.08653333783149719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,float16,0,0.14131200313568115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,fp8,0,0.08538666367530823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,64,128,1,fp8,fp8,0,0.08276799817879994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,64,128,1,fp8,fp8,0,0.08248533308506012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,64,0,1,fp8,fp8,0,0.13366933663686117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,float16,0,0.08618133266766866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,float16,0,0.1399893363316854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,fp8,0,0.08642666538556416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,fp8,0,0.14146666725476584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,64,0,1,fp8,fp8,0,0.1325706640879313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,float16,0,1.7532960573832195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,fp8,0,1.7651252746582031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,64,128,1,fp8,fp8,0,1.6090879440307617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,float16,0,2.6412693659464517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,float16,0,1.7681439717610676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,64,0,1,fp8,fp8,0,2.4395573933919272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,fp8,0,2.648944060007731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,fp8,0,1.7830559412638347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,64,128,1,fp8,fp8,0,1.626442591349284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,float16,0,2.6554080645243325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,float16,0,1.7925705909729004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,64,0,1,fp8,fp8,0,2.459503968556722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,fp8,0,2.669013341267904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,fp8,0,1.8060107231140137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,64,128,1,fp8,fp8,0,1.6506719589233398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,float16,0,2.6825440724690757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,64,0,1,fp8,fp8,0,2.4799466133117676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,fp8,0,2.686976114908854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,float16,0,1.8239359855651855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,fp8,0,1.8358346621195476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,64,128,1,fp8,fp8,0,1.6934186617533367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,float16,0,2.716032028198242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,float16,0,1.0043573379516602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,fp8,0,1.0207306543986003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,64,128,1,fp8,fp8,0,0.9656000137329102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,float16,0,1.4658560752868652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,64,0,1,fp8,fp8,0,2.5259626706441245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,fp8,0,2.7274773915608725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,float16,0,0.8744746843973795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,fp8,0,1.4842027028401692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,64,0,1,fp8,fp8,0,1.3880373636881511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,fp8,0,0.8843093713124593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,64,128,1,fp8,fp8,0,0.8095839818318685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,float16,0,1.3238133589426677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,float16,0,0.885205348332723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,fp8,0,1.3314666748046875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,64,0,1,fp8,fp8,0,1.2344426314036052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,fp8,0,0.8920000394185384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,64,128,1,fp8,fp8,0,0.8200426896413168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,float16,0,1.334378719329834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,fp8,0,1.3381279309590657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,64,0,1,fp8,fp8,0,1.2409119606018066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,float16,0,0.8917973041534424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,fp8,0,0.9000106652577718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,64,128,1,fp8,fp8,0,0.8286293347676595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,float16,0,1.3397173881530762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,fp8,0,1.3497333526611328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,64,0,1,fp8,fp8,0,1.2500426769256592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,float16,0,0.9067786534627279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,fp8,0,0.9163786570231119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,64,128,1,fp8,fp8,0,0.8478079636891683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,float16,0,1.3554506301879883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,float16,0,0.5148586829503378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,fp8,0,1.3657066027323406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,64,0,1,fp8,fp8,0,1.2690773010253906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,float16,0,0.7530879974365234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,fp8,0,0.525002678235372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,64,128,1,fp8,fp8,0,0.49462934335072833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,fp8,0,0.7628373305002848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,float16,0,0.4490079879760742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,64,0,1,fp8,fp8,0,0.7109866937001547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,float16,0,0.677903970082601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,fp8,0,0.45366398493448895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,64,128,1,fp8,fp8,0,0.42048001289367676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,fp8,0,0.682101329167684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,64,0,1,fp8,fp8,0,0.6355946858723959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,float16,0,0.45446932315826416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,float16,0,0.6845440069834391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,fp8,0,0.45739201704661053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,64,128,1,fp8,fp8,0,0.42446398735046387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,fp8,0,0.687824010848999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,fp8,0,0.46329065163930255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,float16,0,0.457856019337972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,64,0,1,fp8,fp8,0,0.6430293321609497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,float16,0,0.6889653205871582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,64,128,1,fp8,fp8,0,0.43007465203603107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,fp8,0,0.6931573549906412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,64,0,1,fp8,fp8,0,0.645413319269816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,float16,0,0.4659359852472941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,fp8,0,0.4717013438542684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,float16,0,0.6985653241475424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,64,128,1,fp8,fp8,0,0.43880001703898114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,fp8,0,0.7034560044606527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,float16,0,0.27077333132425946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,64,0,1,fp8,fp8,0,0.6540799935658773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,64,0,1,fp8,fp8,0,0.3755253156026204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,float16,0,0.39453331629435223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,fp8,0,0.27666666110356647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,64,128,1,fp8,fp8,0,0.26263999938964844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,fp8,0,0.4021386702855428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,float16,0,0.23222400744756064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,float16,0,0.3524693250656128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,fp8,0,0.2341973384221395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,64,128,1,fp8,fp8,0,0.22362667322158813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,fp8,0,0.35790399710337323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,64,0,1,fp8,fp8,0,0.33852267265319824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,float16,0,0.2362826665242513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,float16,0,0.3572160005569458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,fp8,0,0.23655466238657633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,64,128,1,fp8,fp8,0,0.22580265998840332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,fp8,0,0.35955198605855304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,64,0,1,fp8,fp8,0,0.3408373196919759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,float16,0,0.23885866006215414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,float16,0,0.36101865768432617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,fp8,0,0.24080000321070352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,64,128,1,fp8,fp8,0,0.22916799783706665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,fp8,0,0.36257068316141766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,64,0,1,fp8,fp8,0,0.34116800626118976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,float16,0,0.244869331518809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,float16,0,0.3658986488978068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,fp8,0,0.2466986576716105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,64,128,1,fp8,fp8,0,0.23401067654291788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,float16,0,0.1479626695315043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,64,0,1,fp8,fp8,0,0.34885334968566895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,fp8,0,0.3715573151906331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,float16,0,0.2153759996096293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,fp8,0,0.14998400211334229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,64,128,1,fp8,fp8,0,0.14560533563296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,fp8,0,0.21967999140421549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,64,0,1,fp8,fp8,0,0.2083359956741333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,float16,0,0.12549866239229837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,float16,0,0.19370132684707642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,fp8,0,0.12714667121569315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,64,128,1,fp8,fp8,0,0.11955733100573222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,fp8,0,0.1939786672592163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,64,0,1,fp8,fp8,0,0.18254399299621582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,float16,0,0.1255573332309723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,float16,0,0.1946559945742289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,fp8,0,0.12743467092514038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,64,128,1,fp8,fp8,0,0.12130666772524516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,64,128,1,fp8,fp8,0,0.12356799840927124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,fp8,0,0.19664533933003744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,fp8,0,0.19712533553441366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,64,0,1,fp8,fp8,0,0.18313600619633993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,float16,0,0.12781332929929098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,float16,0,0.1957119901974996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,fp8,0,0.1283093293507894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,float16,0,0.13144000371297201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,64,0,1,fp8,fp8,0,0.18587199846903482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,float16,0,0.19930134216944376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,fp8,0,0.1318719983100891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,64,128,1,fp8,fp8,0,0.12949867049853006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,fp8,0,0.2021920084953308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,64,0,1,fp8,fp8,0,0.1933013399442037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,float16,0,0.08664000034332275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,float16,0,0.12523733576138815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,float16,0,0.11763733625411987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,fp8,0,0.08712533116340637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,64,128,1,fp8,fp8,0,0.08724266290664673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,fp8,0,0.1273973286151886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,64,0,1,fp8,fp8,0,0.12331199645996094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,float16,0,0.07824533184369405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,fp8,0,0.08040000001589458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,64,128,1,fp8,fp8,0,0.07458666463692983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,fp8,0,0.11815466483434041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,64,0,1,fp8,fp8,0,0.1114453375339508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,float16,0,0.07877866427103679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,float16,0,0.07932266592979431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,float16,0,0.11916800340016682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,fp8,0,0.07961600025494893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,fp8,0,0.07865599791208903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,fp8,0,0.11989333232243855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,64,128,1,fp8,fp8,0,0.07443200051784515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,fp8,0,0.11946666240692139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,64,0,1,fp8,fp8,0,0.11120532949765523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,float16,0,0.11839999755223592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,64,128,1,fp8,fp8,0,0.0759626676638921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,64,0,1,fp8,fp8,0,0.11340799927711487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,float16,0,0.0791733314593633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,float16,0,0.11959999799728394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,float16,0,0.08866133292516072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,fp8,0,0.06218666831652323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,64,128,1,fp8,fp8,0,0.059792002042134605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,fp8,0,0.08053866525491078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,64,0,1,fp8,fp8,0,0.08483733733495076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,64,128,1,fp8,fp8,0,0.07849599917729695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,float16,0,0.08844799796740214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,fp8,0,0.0621066689491272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,fp8,0,0.120688001314799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,64,0,1,fp8,fp8,0,0.11446932951609294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,float16,0,0.0602453351020813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,fp8,0,0.08892266949017842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,float16,0,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,64,128,1,fp8,fp8,0,0.058058664202690125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,fp8,0,0.08898666501045227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,64,0,1,fp8,fp8,0,0.08476266264915466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,float16,0,0.06085866689682007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,float16,0,0.0885599950949351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,fp8,0,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,64,128,1,fp8,fp8,0,0.05979733169078827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,fp8,0,0.08753599723180135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,64,128,1,fp8,fp8,0,0.059343998630841575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,64,0,1,fp8,fp8,0,0.08391466736793518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,float16,0,0.06198399762312571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,float16,0,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,fp8,0,0.0622026671965917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,fp8,0,0.08871466914812724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,64,0,1,fp8,fp8,0,0.08449600140253703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,float16,0,0.06028800209363302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,64,0,1,fp8,fp8,0,0.08418132861455281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,float16,0,0.08707200487454732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,fp8,0,0.061994666854540505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,64,128,1,fp8,fp8,0,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,fp8,0,0.08872532844543457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,float16,0,1.3076693216959636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,fp8,0,1.31441068649292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,64,128,1,fp8,fp8,0,1.2031253178914387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,float16,0,1.7369653383890789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,fp8,0,1.7497706413269043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,64,0,1,fp8,fp8,0,1.6158080101013184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,float16,0,1.3276586532592773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,float16,0,1.7677706082661946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,fp8,0,1.334122657775879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,64,128,1,fp8,fp8,0,1.220357338587443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,fp8,0,1.7650666236877441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,float16,0,1.3427200317382812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,64,0,1,fp8,fp8,0,1.6326826413472493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,float16,0,1.7746346791585286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,fp8,0,1.3483039538065593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,64,128,1,fp8,fp8,0,1.231061299641927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,fp8,0,1.7838293711344402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,64,0,1,fp8,fp8,0,1.6464533805847168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,float16,0,1.3705013593037922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,fp8,0,1.375285307566325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,float16,0,1.8067679405212402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,64,128,1,fp8,fp8,0,1.262933333714803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,float16,0,0.7583999633789062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,float16,0,0.9896746476491293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,fp8,0,0.7690239747365316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,fp8,0,1.8131945927937825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,64,0,1,fp8,fp8,0,1.6745492617289226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,64,128,1,fp8,fp8,0,0.7266293366750082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,float16,0,0.6587786674499512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,fp8,0,0.999071995417277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,float16,0,0.8799146811167399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,64,0,1,fp8,fp8,0,0.9361653327941895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,fp8,0,0.6639413436253866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,64,128,1,fp8,fp8,0,0.6110933224360148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,fp8,0,0.8825493653615316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,64,0,1,fp8,fp8,0,0.8208479881286621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,float16,0,0.6651946703592936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,64,128,1,fp8,fp8,0,0.6175146500269572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,float16,0,0.8870453039805094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,fp8,0,0.6726880073547363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,fp8,0,0.8907413482666016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,64,0,1,fp8,fp8,0,0.8303893407185873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,float16,0,0.6703893343607584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,fp8,0,0.6788106759389242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,float16,0,0.8925279776255289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,64,128,1,fp8,fp8,0,0.6247733434041342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,fp8,0,0.9010826746622721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,64,0,1,fp8,fp8,0,0.8364373048146566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,float16,0,0.6858399709065756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,fp8,0,0.6924426555633545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,float16,0,0.9096799691518148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,fp8,0,0.9128320217132568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,64,128,1,fp8,fp8,0,0.6413973172505697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,float16,0,0.3905653158823649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,64,128,1,fp8,fp8,0,0.37589867909749347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,64,0,1,fp8,fp8,0,0.8483466307322184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,float16,0,0.5103466510772705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,fp8,0,0.3967519998550415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,float16,0,0.45203733444213867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,fp8,0,0.3408426841100057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,fp8,0,0.5179093281428019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,64,0,1,fp8,fp8,0,0.4846239884694417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,float16,0,0.340773344039917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,64,0,1,fp8,fp8,0,0.42691198984781903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,float16,0,0.33879466851552326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,64,128,1,fp8,fp8,0,0.3181813359260559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,fp8,0,0.45687464872996014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,float16,0,0.4548373222351074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,fp8,0,0.34457067648569745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,64,128,1,fp8,fp8,0,0.32225600878397626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,fp8,0,0.4580853382746379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,64,0,1,fp8,fp8,0,0.43037867546081543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,float16,0,0.3471733331680298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,fp8,0,0.3492159843444824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,float16,0,0.4614773193995158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,64,128,1,fp8,fp8,0,0.32683199644088745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,fp8,0,0.464629332224528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,64,0,1,fp8,fp8,0,0.435477336247762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,float16,0,0.35411731402079266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,float16,0,0.4677813450495402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,fp8,0,0.3581226666768392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,64,128,1,fp8,fp8,0,0.3339253266652425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,fp8,0,0.4726986487706502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,64,128,1,fp8,fp8,0,0.20265066623687744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,float16,0,0.2072640061378479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,64,0,1,fp8,fp8,0,0.44230401515960693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,float16,0,0.27081600824991864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,fp8,0,0.21145600080490112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,fp8,0,0.27485867341359455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,64,0,1,fp8,fp8,0,0.2605066696802775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,float16,0,0.17691200971603394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,float16,0,0.2363626758257548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,fp8,0,0.17907732725143433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,64,128,1,fp8,fp8,0,0.17147733767827353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,fp8,0,0.24013332525889078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,64,0,1,fp8,fp8,0,0.228767991065979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,float16,0,0.1771786610285441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,float16,0,0.2376586596171061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,fp8,0,0.17940266927083334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,64,128,1,fp8,fp8,0,0.1728960076967875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,fp8,0,0.24043200413386026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,64,0,1,fp8,fp8,0,0.2299893299738566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,float16,0,0.18030399084091187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,float16,0,0.24034667015075684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,fp8,0,0.1813546617825826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,64,128,1,fp8,fp8,0,0.1766773263613383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,fp8,0,0.24249066909154257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,64,0,1,fp8,fp8,0,0.23296533028284708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,float16,0,0.1848426659901937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,float16,0,0.2480213244756063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,fp8,0,0.18750399351119995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,64,128,1,fp8,fp8,0,0.1820639967918396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,fp8,0,0.2493120034535726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,64,0,1,fp8,fp8,0,0.23773332436879477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,float16,0,0.1144586702187856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,float16,0,0.14968533317248026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,fp8,0,0.11745066444079082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,64,128,1,fp8,fp8,0,0.11353600025177002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,fp8,0,0.151418666044871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,64,0,1,fp8,fp8,0,0.1469119985898336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,64,0,1,fp8,fp8,0,0.1255466639995575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,float16,0,0.09876799583435059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,float16,0,0.1336373289426168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,fp8,0,0.10078400373458862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,64,128,1,fp8,fp8,0,0.09267733494440715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,fp8,0,0.13564266761144003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,float16,0,0.09949866930643718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,float16,0,0.13404800494511923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,fp8,0,0.101200004418691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,64,128,1,fp8,fp8,0,0.09361599882443745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,64,128,1,fp8,fp8,0,0.09512000282605489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,fp8,0,0.1346773306528727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,64,0,1,fp8,fp8,0,0.12738666931788126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,float16,0,0.10155199964841206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,float16,0,0.10044800241788228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,float16,0,0.13565333684285483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,fp8,0,0.10229866703351338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,fp8,0,0.13664000233014426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,64,0,1,fp8,fp8,0,0.12755200266838074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,float16,0,0.13618133465449014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,float16,0,0.08756800492604573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,fp8,0,0.10289067029953003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,64,128,1,fp8,fp8,0,0.09875733653704326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,fp8,0,0.13844799995422363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,64,0,1,fp8,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,64,0,1,fp8,fp8,0,0.12949333588282266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,float16,0,0.08474666873613994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,fp8,0,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,float16,0,0.06778133412202199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,fp8,0,0.06837333242098491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,64,0,1,fp8,fp8,0,0.07936533292134602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,64,128,1,fp8,fp8,0,0.06764799853165944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,fp8,0,0.09027733405431111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,float16,0,0.06215466558933258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,64,128,1,fp8,fp8,0,0.059877331058184304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,fp8,0,0.08451732993125916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,float16,0,0.06306666632493337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,float16,0,0.08463467160860698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,fp8,0,0.06406400104363759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,64,128,1,fp8,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,fp8,0,0.08469866712888081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,64,0,1,fp8,fp8,0,0.07983999947706859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,64,0,1,fp8,fp8,0,0.08073066671689351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,float16,0,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,float16,0,0.08448533217112224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,fp8,0,0.0643146683772405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,64,128,1,fp8,fp8,0,0.06001600126425425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,fp8,0,0.08608000477155049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,float16,0,0.0642133355140686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,float16,0,0.08481066425641377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,fp8,0,0.0639519989490509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,64,128,1,fp8,fp8,0,0.06217066446940104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,fp8,0,0.08673600355784099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,64,0,1,fp8,fp8,0,0.08051733175913493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,float16,0,0.05162666738033295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,float16,0,0.06604800124963124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,fp8,0,0.053344001372655235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,64,128,1,fp8,fp8,0,0.05046399931112925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,64,128,1,fp8,fp8,0,0.051642666260401406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,fp8,0,0.06463466584682465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,64,0,1,fp8,fp8,0,0.06203199923038483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,float16,0,0.05294933418432871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,float16,0,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,fp8,0,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,fp8,0,0.066021333138148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,64,0,1,fp8,fp8,0,0.062218666076660156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,fp8,0,0.06423466900984447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,64,0,1,fp8,fp8,0,0.062181333700815834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,float16,0,0.05183466772238413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,float16,0,0.0662773350874583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,fp8,0,0.05189866820971171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,64,128,1,fp8,fp8,0,0.050479998191197716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,fp8,0,0.0660159985224406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,float16,0,0.052442664901415505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,float16,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,float16,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,fp8,0,0.051738664507865906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,64,128,1,fp8,fp8,0,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,64,0,1,fp8,fp8,0,0.062224000692367554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,float16,0,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,fp8,0,0.052815998593966164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,64,128,1,fp8,fp8,0,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,fp8,0,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,64,0,1,fp8,fp8,0,0.06198399762312571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,float16,0,1.5663679440816243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,fp8,0,1.5629173914591472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,float16,0,1.8429333368937175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,64,128,1,fp8,fp8,0,1.5397334098815918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,fp8,0,1.8342773119608562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,float16,0,1.5761173566182454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,64,0,1,fp8,fp8,0,1.8007253011067708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,float16,0,1.8505493799845378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,fp8,0,1.5687306722005208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,64,128,1,fp8,fp8,0,1.5762826601664226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,fp8,0,1.838271935780843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,float16,0,1.5839734077453613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,64,0,1,fp8,fp8,0,1.840928077697754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,fp8,0,1.5789440472920735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,float16,0,1.8607412974039714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,64,128,1,fp8,fp8,0,1.595253308614095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,fp8,0,1.8564480145772297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,64,0,1,fp8,fp8,0,1.87116273244222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,float16,0,1.6759732564290364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,fp8,0,1.634559949239095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,float16,0,1.923807938893636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,64,128,1,fp8,fp8,0,1.6681119600931804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,float16,0,0.8528266747792562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,fp8,0,1.9126292864481609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,float16,0,1.0005226929982503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,fp8,0,0.8361866474151611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,64,0,1,fp8,fp8,0,1.940224011739095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,64,128,1,fp8,fp8,0,0.8675306638081869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,fp8,0,0.9825440247853597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,float16,0,0.7938613096872965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,64,0,1,fp8,fp8,0,1.0087306499481201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,float16,0,0.9307359854380289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,fp8,0,0.7907520135243734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,64,128,1,fp8,fp8,0,0.7453440030415853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,64,0,1,fp8,fp8,0,0.8811840216318766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,fp8,0,0.928266684214274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,float16,0,0.7955199877421061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,float16,0,0.936346689860026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,fp8,0,0.7949173450469971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,64,128,1,fp8,fp8,0,0.7627893288930258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,fp8,0,0.932522694269816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,64,0,1,fp8,fp8,0,0.8984906673431396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,float16,0,0.7999680042266846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,float16,0,0.9405866463979086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,fp8,0,0.7999786535898844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,64,128,1,fp8,fp8,0,0.7682133515675863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,fp8,0,0.9382773240407308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,64,0,1,fp8,fp8,0,0.8990186850229899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,float16,0,0.8184213638305664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,float16,0,0.9604746500651041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,fp8,0,0.8138026396433512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,64,128,1,fp8,fp8,0,0.8274079958597819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,float16,0,0.4336479902267456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,fp8,0,0.9447946548461914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,64,128,1,fp8,fp8,0,0.4363573392232259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,float16,0,0.5114186604817709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,64,0,1,fp8,fp8,0,0.9677920341491699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,fp8,0,0.4267466862996419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,fp8,0,0.5017226537068685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,fp8,0,0.4041920105616252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,64,128,1,fp8,fp8,0,0.38116268316904706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,64,0,1,fp8,fp8,0,0.5071946779886881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,float16,0,0.4045333464940389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,float16,0,0.479039986928304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,fp8,0,0.4767520030339559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,64,0,1,fp8,fp8,0,0.4509546756744385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,float16,0,0.4055946667989095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,float16,0,0.47787201404571533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,fp8,0,0.405509352684021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,64,128,1,fp8,fp8,0,0.3907359838485718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,fp8,0,0.4769226710001628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,64,0,1,fp8,fp8,0,0.4593013525009155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,float16,0,0.41047998269399005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,fp8,0,0.48134398460388184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,float16,0,0.4835573434829712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,fp8,0,0.40829865137736004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,64,128,1,fp8,fp8,0,0.3899199962615967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,64,0,1,fp8,fp8,0,0.4606826702753703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,float16,0,0.4134666522343953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,float16,0,0.4861439863840739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,fp8,0,0.41313600540161133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,64,128,1,fp8,fp8,0,0.4012426535288493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,float16,0,0.26743467648824054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,fp8,0,0.48687998453776044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,float16,0,0.22574933369954428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,64,0,1,fp8,fp8,0,0.47118401527404785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,64,0,1,fp8,fp8,0,0.262992004553477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,fp8,0,0.22220800320307413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,64,128,1,fp8,fp8,0,0.22605866193771362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,fp8,0,0.26268800099690753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,float16,0,0.20997333526611328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,float16,0,0.24798399209976196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,fp8,0,0.21043733755747476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,float16,0,0.20945600668589273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,64,128,1,fp8,fp8,0,0.19849065939585367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,fp8,0,0.24756266673405966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,64,0,1,fp8,fp8,0,0.23668267329533896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,float16,0,0.24895467360814413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,fp8,0,0.21011734008789062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,64,128,1,fp8,fp8,0,0.20202134052912393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,float16,0,0.25144000848134357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,fp8,0,0.2490773399670919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,64,128,1,fp8,fp8,0,0.20190399885177612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,64,0,1,fp8,fp8,0,0.23708266019821167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,float16,0,0.21202133099238077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,fp8,0,0.21273066600163779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,fp8,0,0.25034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,64,0,1,fp8,fp8,0,0.24016000827153525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,float16,0,0.21530133485794067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,float16,0,0.2541653315226237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,fp8,0,0.2136639952659607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,64,128,1,fp8,fp8,0,0.2072746753692627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,fp8,0,0.2531680067380269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,float16,0,0.12146133184432983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,64,0,1,fp8,fp8,0,0.24361066023508707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,float16,0,0.1439413328965505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,fp8,0,0.11967999736467998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,64,128,1,fp8,fp8,0,0.12427199880282085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,fp8,0,0.1420799990495046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,64,0,1,fp8,fp8,0,0.14510933558146158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,float16,0,0.11123733719189961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,float16,0,0.1304693321386973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,fp8,0,0.10969066619873047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,64,128,1,fp8,fp8,0,0.10360532999038696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,64,128,1,fp8,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,fp8,0,0.13144000371297201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,64,0,1,fp8,fp8,0,0.1253706713517507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,float16,0,0.11122666796048482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,float16,0,0.1316266655921936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,fp8,0,0.11106133460998535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,fp8,0,0.13152000308036804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,64,0,1,fp8,fp8,0,0.12549333771069845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,float16,0,0.11081066727638245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,float16,0,0.1325440009435018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,fp8,0,0.11174399654070537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,64,128,1,fp8,fp8,0,0.10807466506958008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,fp8,0,0.13210666179656982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,64,0,1,fp8,fp8,0,0.12893866499265036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,float16,0,0.11333333452542622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,float16,0,0.13525866468747458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,fp8,0,0.11338133613268535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,64,128,1,fp8,fp8,0,0.1113813320795695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,fp8,0,0.13362666964530945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,64,0,1,fp8,fp8,0,0.13199466466903687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,float16,0,0.06800533334414165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,float16,0,0.06571199993292491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,float16,0,0.08237866560618083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,fp8,0,0.06493333478768666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,fp8,0,0.0682773341735204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,64,128,1,fp8,fp8,0,0.07145600020885468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,fp8,0,0.08070399860541026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,float16,0,0.066170667608579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,64,0,1,fp8,fp8,0,0.08268266419569652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,float16,0,0.07877333462238312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,64,128,1,fp8,fp8,0,0.06208533545335134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,fp8,0,0.07865599791208903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,64,0,1,fp8,fp8,0,0.0746613343556722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,fp8,0,0.07823466757933299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,float16,0,0.0796800007422765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,fp8,0,0.06623466809590657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,64,0,1,fp8,fp8,0,0.07459199925263722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,float16,0,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,fp8,0,0.06610666712125142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,64,128,1,fp8,fp8,0,0.06400533517201741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,float16,0,0.07874666651089986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,float16,0,0.06583466629187266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,64,128,1,fp8,fp8,0,0.06328000128269196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,fp8,0,0.07867200175921123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,64,0,1,fp8,fp8,0,0.07644266883532207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,float16,0,0.06628266473611195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,fp8,0,0.06499733527501424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,64,128,1,fp8,fp8,0,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,64,128,1,fp8,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,fp8,0,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,fp8,0,0.0776800016562144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,64,0,1,fp8,fp8,0,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,float16,0,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,float16,0,0.05156266689300537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,fp8,0,0.041946664452552795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,64,0,1,fp8,fp8,0,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,float16,0,0.040720000863075256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,float16,0,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,64,128,1,fp8,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,fp8,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,64,0,1,fp8,fp8,0,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,float16,0,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,float16,0,0.04984533290068308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,fp8,0,0.03984000037113825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,float16,0,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,64,128,1,fp8,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,fp8,0,0.05028266708056132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,64,0,1,fp8,fp8,0,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,float16,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,64,128,1,fp8,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,64,128,1,fp8,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,fp8,0,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,64,0,1,fp8,fp8,0,0.04786666731039683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,float16,0,0.04111466556787491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,float16,0,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,fp8,0,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,64,0,1,fp8,fp8,0,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,64,128,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,fp8,0,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,64,0,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,float16,0,0.02664533257484436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,float16,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,64,128,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,64,0,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,float16,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,float16,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,64,128,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,fp8,0,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,64,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,float16,0,0.02569599946339925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,64,128,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,fp8,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,64,0,1,fp8,fp8,0,0.032144000132878624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,float16,0,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,fp8,0,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,64,128,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,fp8,0,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,64,0,1,fp8,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,float16,0,1.5098826090494792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,float16,0,1.526250680287679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,fp8,0,1.509936014811198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,64,128,1,fp8,fp8,0,1.4774452845255535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,fp8,0,1.5229973793029785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,64,0,1,fp8,fp8,0,1.5034826596577961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,float16,0,1.5182560284932454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,float16,0,1.5330026944478352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,fp8,0,1.513055960337321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,64,128,1,fp8,fp8,0,1.5252265930175781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,fp8,0,1.5274772644042969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,64,0,1,fp8,fp8,0,1.545082728068034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,float16,0,1.5300854047139485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,float16,0,1.5412425994873047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,fp8,0,1.5228053728739421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,64,128,1,fp8,fp8,0,1.5319573084513347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,fp8,0,1.5408053398132324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,64,0,1,fp8,fp8,0,1.539413293202718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,float16,0,1.5998400052388508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,float16,0,1.6104213396708171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,fp8,0,1.563536008199056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,64,128,1,fp8,fp8,0,1.6352426211039226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,float16,0,0.824293295542399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,fp8,0,1.5825014114379883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,float16,0,0.8361333211263021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,64,0,1,fp8,fp8,0,1.6467199325561523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,fp8,0,0.8112426598866781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,64,128,1,fp8,fp8,0,0.8207253615061442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,fp8,0,0.8255733648935953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,64,0,1,fp8,fp8,0,0.8410186767578125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,float16,0,0.7637706597646078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,float16,0,0.7707733313242594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,fp8,0,0.7645866870880127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,64,128,1,fp8,fp8,0,0.7204373677571615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,fp8,0,0.7683626810709635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,64,0,1,fp8,fp8,0,0.7292586962381998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,float16,0,0.7710080146789551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,float16,0,0.7738986810048422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,fp8,0,0.7651893297831217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,64,128,1,fp8,fp8,0,0.7382079760233561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,fp8,0,0.7724640369415283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,64,0,1,fp8,fp8,0,0.7510879834493002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,float16,0,0.7725813388824463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,float16,0,0.781440019607544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,fp8,0,0.7740693092346191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,64,128,1,fp8,fp8,0,0.7398613293965658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,fp8,0,0.7760960261027018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,64,0,1,fp8,fp8,0,0.7481760183970133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,float16,0,0.7834719816843668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,float16,0,0.7996906439463297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,fp8,0,0.7778773307800293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,64,128,1,fp8,fp8,0,0.8105599880218506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,float16,0,0.42178134123484295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,fp8,0,0.7873546282450358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,float16,0,0.4264479875564575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,64,0,1,fp8,fp8,0,0.8136959870656332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,fp8,0,0.4150133530298869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,64,128,1,fp8,fp8,0,0.4206453164418538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,fp8,0,0.42084801197052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,64,0,1,fp8,fp8,0,0.4288640022277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,float16,0,0.3911893367767334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,float16,0,0.39601067701975506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,fp8,0,0.39155733585357666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,64,128,1,fp8,fp8,0,0.3694719870885213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,fp8,0,0.39584000905354816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,64,0,1,fp8,fp8,0,0.3743199904759725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,float16,0,0.3930399815241496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,float16,0,0.39603734016418457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,fp8,0,0.39611732959747314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,fp8,0,0.3914506832758586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,float16,0,0.4002559979756673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,64,128,1,fp8,fp8,0,0.37748265266418457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,64,0,1,fp8,fp8,0,0.38127466042836505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,float16,0,0.3977546691894531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,float16,0,0.401472012201945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,64,0,1,fp8,fp8,0,0.3819146553675334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,fp8,0,0.39428265889485675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,64,128,1,fp8,fp8,0,0.3760480086008708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,fp8,0,0.4005599816640218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,float16,0,0.40593600273132324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,fp8,0,0.39933331807454425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,64,128,1,fp8,fp8,0,0.3920746644337972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,fp8,0,0.40513066450754803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,64,0,1,fp8,fp8,0,0.3948213259379069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,fp8,0,0.22028799851735434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,float16,0,0.21967466672261557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,float16,0,0.22284799814224243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,fp8,0,0.21626667181650797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,64,128,1,fp8,fp8,0,0.2209440072377523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,64,0,1,fp8,fp8,0,0.22405866781870523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,float16,0,0.20409067471822104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,float16,0,0.20546134312947592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,float16,0,0.20325867335001627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,fp8,0,0.20356800158818564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,64,128,1,fp8,fp8,0,0.19274133443832397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,fp8,0,0.20508267482121786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,64,0,1,fp8,fp8,0,0.19554666678110758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,float16,0,0.20528000593185425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,fp8,0,0.2034506599108378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,64,128,1,fp8,fp8,0,0.19386667013168335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,fp8,0,0.20467732350031534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,64,0,1,fp8,fp8,0,0.19704532623291016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,float16,0,0.20599999030431113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,float16,0,0.20775999625523886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,fp8,0,0.20543466011683145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,64,128,1,fp8,fp8,0,0.19604265689849854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,fp8,0,0.20756800969441733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,64,0,1,fp8,fp8,0,0.1986773411432902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,float16,0,0.20705600579579672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,float16,0,0.11761599779129028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,float16,0,0.21102933088938394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,fp8,0,0.20811200141906738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,64,128,1,fp8,fp8,0,0.2013173302014669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,fp8,0,0.20954134066899618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,64,0,1,fp8,fp8,0,0.2035413384437561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,float16,0,0.11930666367212932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,fp8,0,0.11525866389274597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,64,128,1,fp8,fp8,0,0.11964799960454305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,fp8,0,0.11870400110880534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,64,0,1,fp8,fp8,0,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,float16,0,0.10734933614730835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,float16,0,0.10734400153160095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,float16,0,0.10932266712188721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,fp8,0,0.10868799686431885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,64,128,1,fp8,fp8,0,0.10259733597437541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,fp8,0,0.10934399565060933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,64,0,1,fp8,fp8,0,0.10460799932479858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,float16,0,0.10870933532714844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,fp8,0,0.10737599929173787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,64,128,1,fp8,fp8,0,0.10263466835021973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,fp8,0,0.10922132929166158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,64,0,1,fp8,fp8,0,0.10500799616177876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,float16,0,0.10909333825111389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,float16,0,0.11045333743095398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,float16,0,0.11158399780591328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,fp8,0,0.1085653305053711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,64,128,1,fp8,fp8,0,0.10591999689737956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,fp8,0,0.10938666264216106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,64,0,1,fp8,fp8,0,0.10700800021489461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,float16,0,0.11078400413195293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,fp8,0,0.11011200149854024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,64,128,1,fp8,fp8,0,0.1076746682325999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,fp8,0,0.11223466197649638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,64,0,1,fp8,fp8,0,0.11036800344785054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,64,0,1,fp8,fp8,0,0.06951466699441274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,float16,0,0.06735466420650482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,float16,0,0.06705600023269653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,float16,0,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,fp8,0,0.06598933537801106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,64,128,1,fp8,fp8,0,0.0695253312587738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,fp8,0,0.06809600194295247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,float16,0,0.06431999802589417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,float16,0,0.06429333488146464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,64,128,1,fp8,fp8,0,0.06011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,64,0,1,fp8,fp8,0,0.06217066446940104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,float16,0,0.06434666613737743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,fp8,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,64,128,1,fp8,fp8,0,0.06198933223883311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,fp8,0,0.06446399788061778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,fp8,0,0.06412800153096516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,64,0,1,fp8,fp8,0,0.06020799775918325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,float16,0,0.0642986645301183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,float16,0,0.06465066472689311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,float16,0,0.06419200201829274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,64,128,1,fp8,fp8,0,0.06228266656398773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,fp8,0,0.06409599880377452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,64,0,1,fp8,fp8,0,0.06331199904282887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,float16,0,0.06404266754786174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,64,128,1,fp8,fp8,0,0.0614879975716273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,64,0,1,fp8,fp8,0,0.06300266583760579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,float16,0,0.04081599911053976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,float16,0,0.042581334710121155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,fp8,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,64,128,1,fp8,fp8,0,0.04037333279848099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,fp8,0,0.041696002086003624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,64,0,1,fp8,fp8,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,float16,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,float16,0,0.04105599969625473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,fp8,0,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,64,128,1,fp8,fp8,0,0.03853866706291834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,fp8,0,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,64,0,1,fp8,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,float16,0,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,float16,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,fp8,0,0.04042666653792063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,64,128,1,fp8,fp8,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,64,128,1,fp8,fp8,0,0.03889599939187368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,64,0,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,float16,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,float16,0,0.04102933406829834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,float16,0,0.04114133367935816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,fp8,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,64,0,1,fp8,fp8,0,0.040250666439533234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,float16,0,0.04055466751257578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,fp8,0,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,64,128,1,fp8,fp8,0,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,float16,0,0.02647999922434489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,64,0,1,fp8,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,float16,0,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,float16,0,0.02769600103298823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,64,128,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,64,0,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,64,128,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,fp8,0,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,64,0,1,fp8,fp8,0,0.02672533442576726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,float16,0,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,float16,0,0.025754667818546295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,64,128,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,64,0,1,fp8,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,float16,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,fp8,0,0.028890666862328846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,64,128,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,64,0,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,float16,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,fp8,0,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,64,128,1,fp8,fp8,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,64,0,1,fp8,fp8,0,0.026613332331180573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,64,128,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,64,0,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,float16,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,fp8,0,0.022895999252796173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,64,0,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,64,128,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,64,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,64,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,64,0,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,64,128,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,float16,0,0.7152746518452963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,float16,0,0.7010613282521566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,fp8,0,0.7085279623667399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,64,128,1,fp8,fp8,0,0.6636533339818319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,fp8,0,0.6977653503417969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,64,0,1,fp8,fp8,0,0.6534080108006796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,float16,0,0.7112212975819906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,float16,0,0.6981759866078695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,fp8,0,0.70797332127889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,64,128,1,fp8,fp8,0,0.6837866306304932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,fp8,0,0.6960373719533285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,64,0,1,fp8,fp8,0,0.6729333400726318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,float16,0,0.7172640164693197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,float16,0,0.7009546756744385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,fp8,0,0.7139146327972412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,64,128,1,fp8,fp8,0,0.6878560384114584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,fp8,0,0.69705597559611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,64,0,1,fp8,fp8,0,0.6735626856486002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,float16,0,0.7298986911773682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,fp8,0,0.7253440221150717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,float16,0,0.7161973317464193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,64,128,1,fp8,fp8,0,0.7478079795837402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,float16,0,0.3908853530883789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,fp8,0,0.7112747033437093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,float16,0,0.3838293155034383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,64,0,1,fp8,fp8,0,0.7374880313873291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,64,0,1,fp8,fp8,0,0.3885759909947713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,fp8,0,0.38475199540456134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,64,128,1,fp8,fp8,0,0.3949973185857137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,fp8,0,0.3786453406016032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,float16,0,0.36327465375264484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,float16,0,0.3563733498255412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,fp8,0,0.3630346854527791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,64,128,1,fp8,fp8,0,0.33904532591501874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,fp8,0,0.3564693530400594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,64,0,1,fp8,fp8,0,0.3330186605453491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,float16,0,0.36342934767405194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,float16,0,0.3569120168685913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,fp8,0,0.3613599936167399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,64,128,1,fp8,fp8,0,0.34804801146189374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,fp8,0,0.3534719944000244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,64,0,1,fp8,fp8,0,0.34113065401713055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,float16,0,0.36753066380818683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,float16,0,0.3598293463389079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,fp8,0,0.3650346597035726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,float16,0,0.3717706600824992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,64,128,1,fp8,fp8,0,0.35304001967112225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,fp8,0,0.3686613241831462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,fp8,0,0.3563679854075114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,64,0,1,fp8,fp8,0,0.3426400025685628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,64,0,1,fp8,fp8,0,0.35708800951639813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,float16,0,0.36363200346628827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,64,128,1,fp8,fp8,0,0.3619306484858195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,fp8,0,0.362554669380188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,float16,0,0.20293867588043213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,float16,0,0.1990506649017334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,fp8,0,0.19914666811625162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,64,128,1,fp8,fp8,0,0.20566932360331217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,fp8,0,0.19677333037058511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,64,0,1,fp8,fp8,0,0.20282133420308432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,float16,0,0.1886133352915446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,float16,0,0.18440000216166177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,fp8,0,0.18724799156188965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,64,128,1,fp8,fp8,0,0.1785866618156433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,fp8,0,0.18285866578420004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,64,0,1,fp8,fp8,0,0.17287466923395792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,float16,0,0.18689600626627603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,float16,0,0.1838080088297526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,fp8,0,0.1858560045560201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,64,128,1,fp8,fp8,0,0.18039999405543009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,fp8,0,0.1827359994252523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,64,0,1,fp8,fp8,0,0.17668799559275308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,float16,0,0.18893865744272867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,float16,0,0.1848426659901937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,fp8,0,0.18901334206263223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,64,128,1,fp8,fp8,0,0.1824373404184977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,fp8,0,0.18470933039983115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,64,0,1,fp8,fp8,0,0.17795199155807495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,float16,0,0.19202667474746704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,float16,0,0.18710400660832724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,fp8,0,0.191103994846344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,fp8,0,0.1050986647605896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,64,128,1,fp8,fp8,0,0.18613332509994507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,fp8,0,0.18752533197402954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,64,128,1,fp8,fp8,0,0.11173866192499797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,64,0,1,fp8,fp8,0,0.18107734123865762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,float16,0,0.10731732845306396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,float16,0,0.1067039966583252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,fp8,0,0.10345600048700969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,64,128,1,fp8,fp8,0,0.09293333689371745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,64,0,1,fp8,fp8,0,0.10961066683133443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,float16,0,0.09891733527183533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,float16,0,0.09691199660301208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,float16,0,0.0951039989789327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,fp8,0,0.09806399544080098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,fp8,0,0.09526399771372478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,64,0,1,fp8,fp8,0,0.09066133697827657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,float16,0,0.09876267115275066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,fp8,0,0.09880000352859497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,float16,0,0.09669867157936096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,64,128,1,fp8,fp8,0,0.09360000491142273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,64,128,1,fp8,fp8,0,0.0960586667060852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,fp8,0,0.0950933297475179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,64,0,1,fp8,fp8,0,0.0927946666876475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,float16,0,0.09888533751169841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,float16,0,0.09874666730562846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,fp8,0,0.09914132952690125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,fp8,0,0.09702400366465251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,64,0,1,fp8,fp8,0,0.09329600135485332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,float16,0,0.10087466239929199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,float16,0,0.06252266466617584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,fp8,0,0.1009173293908437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,64,128,1,fp8,fp8,0,0.09909333785374959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,fp8,0,0.0988159974416097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,fp8,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,64,0,1,fp8,fp8,0,0.09704533219337463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,float16,0,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,float16,0,0.062208001812299095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,fp8,0,0.06340266764163971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,64,128,1,fp8,fp8,0,0.06566933294137318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,64,0,1,fp8,fp8,0,0.06526400148868561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,float16,0,0.05779199798901876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,fp8,0,0.05995733539263407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,fp8,0,0.059903999169667564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,64,128,1,fp8,fp8,0,0.05779733260472616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,fp8,0,0.058176000912984215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,64,0,1,fp8,fp8,0,0.05598933498064677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,64,0,1,fp8,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,float16,0,0.060005332032839455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,float16,0,0.057722667853037514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,64,128,1,fp8,fp8,0,0.05778666834036509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,64,128,1,fp8,fp8,0,0.05719466507434845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,fp8,0,0.05806933343410492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,float16,0,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,float16,0,0.058101331194241844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,fp8,0,0.059845333298047386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,64,128,1,fp8,fp8,0,0.05855466425418854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,fp8,0,0.05799466868241628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,64,0,1,fp8,fp8,0,0.05754133562246958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,float16,0,0.059343998630841575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,float16,0,0.05818133552869161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,fp8,0,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,64,0,1,fp8,fp8,0,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,float16,0,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,float16,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,float16,0,0.0367999995748202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,64,128,1,fp8,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,64,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,float16,0,0.035904000202814736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,64,128,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,64,128,1,fp8,fp8,0,0.036346666514873505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,64,0,1,fp8,fp8,0,0.03522133330504099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,float16,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,float16,0,0.03596800069014231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,fp8,0,0.03737066686153412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,fp8,0,0.03572800010442734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,64,0,1,fp8,fp8,0,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,float16,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,float16,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,fp8,0,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,64,128,1,fp8,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,fp8,0,0.03640000025431315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,64,0,1,fp8,fp8,0,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,float16,0,0.03659733384847641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,float16,0,0.036720000207424164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,64,128,1,fp8,fp8,0,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,64,0,1,fp8,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,float16,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,float16,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,64,128,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,64,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,64,128,1,fp8,fp8,0,0.024138666689395905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,fp8,0,0.02425066630045573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,float16,0,0.02492800106604894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,64,128,1,fp8,fp8,0,0.02477866659561793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,64,0,1,fp8,fp8,0,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,fp8,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,64,128,1,fp8,fp8,0,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,64,0,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,64,128,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,64,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,float16,0,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,float16,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,float16,0,0.020224000016848247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,64,128,1,fp8,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,fp8,0,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,64,128,1,fp8,fp8,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,64,0,1,fp8,fp8,0,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,64,128,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,64,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,float16,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,64,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,float16,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,64,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,64,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,float16,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,64,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,float16,0,0.3866986831029256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,float16,0,0.3866080045700073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,fp8,0,0.38340266545613605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,64,128,1,fp8,fp8,0,0.3653493324915568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,fp8,0,0.38326934973398846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,64,0,1,fp8,fp8,0,0.36663464705149335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,float16,0,0.3840159972508748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,float16,0,0.38198399543762207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,fp8,0,0.38070933024088544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,64,128,1,fp8,fp8,0,0.37382400035858154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,fp8,0,0.38068799177805585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,64,0,1,fp8,fp8,0,0.371509313583374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,float16,0,0.38607998689015705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,float16,0,0.3841013511021932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,fp8,0,0.38356268405914307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,64,128,1,fp8,fp8,0,0.37034134070078534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,fp8,0,0.38537601629892987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,64,0,1,fp8,fp8,0,0.3738666772842407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,float16,0,0.39459200700124103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,float16,0,0.39476267496744794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,fp8,0,0.39161598682403564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,64,0,1,fp8,fp8,0,0.38469334443410236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,64,128,1,fp8,fp8,0,0.38338132699330646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,fp8,0,0.3903466860453288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,float16,0,0.21465067068735758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,float16,0,0.21382933855056763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,fp8,0,0.2090453306833903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,64,128,1,fp8,fp8,0,0.21612799167633057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,fp8,0,0.21199999252955118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,64,0,1,fp8,fp8,0,0.21557333072026572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,float16,0,0.1990293264389038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,float16,0,0.19905600945154825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,fp8,0,0.19952533642450967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,64,128,1,fp8,fp8,0,0.1888373295466105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,fp8,0,0.1995733380317688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,64,0,1,fp8,fp8,0,0.18793066342671713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,float16,0,0.19645865758260092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,float16,0,0.19754133621851602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,fp8,0,0.19698133071263632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,64,128,1,fp8,fp8,0,0.19420800606409708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,float16,0,0.19981332619984946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,fp8,0,0.19675199190775552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,64,128,1,fp8,fp8,0,0.1920373241106669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,64,0,1,fp8,fp8,0,0.19113065799077353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,64,0,1,fp8,fp8,0,0.19301333030064902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,float16,0,0.19925334056218466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,fp8,0,0.19945067167282104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,fp8,0,0.19815999269485474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,fp8,0,0.20118399461110434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,float16,0,0.20396800835927328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,float16,0,0.20329066117604574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,fp8,0,0.20015466213226318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,64,128,1,fp8,fp8,0,0.1970026691754659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,64,0,1,fp8,fp8,0,0.19849600394566855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,float16,0,0.1135040024916331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,float16,0,0.10391466816266377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,float16,0,0.11231999595959981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,fp8,0,0.1120853324731191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,64,128,1,fp8,fp8,0,0.1156213382879893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,fp8,0,0.11213866869608562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,64,0,1,fp8,fp8,0,0.11582932869593303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,float16,0,0.10567999879519145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,fp8,0,0.10416533549626668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,64,128,1,fp8,fp8,0,0.09873066345850627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,fp8,0,0.10362133383750916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,64,0,1,fp8,fp8,0,0.09866666793823242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,float16,0,0.1049066682656606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,float16,0,0.10533333818117778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,float16,0,0.10520533720652263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,fp8,0,0.10459733009338379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,64,128,1,fp8,fp8,0,0.10071466366449992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,fp8,0,0.1039573351542155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,64,0,1,fp8,fp8,0,0.09966933727264404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,float16,0,0.10455999771753947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,fp8,0,0.10492266217867534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,64,128,1,fp8,fp8,0,0.10259733597437541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,fp8,0,0.10599467158317566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,64,0,1,fp8,fp8,0,0.1011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,float16,0,0.10778133074442546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,float16,0,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,float16,0,0.10693333546320598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,fp8,0,0.10691199700037639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,64,128,1,fp8,fp8,0,0.10563199718793233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,fp8,0,0.10634133219718933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,64,0,1,fp8,fp8,0,0.10564800103505452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,float16,0,0.06075199941794077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,float16,0,0.0622026671965917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,fp8,0,0.06192000210285187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,64,128,1,fp8,fp8,0,0.06597866614659627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,fp8,0,0.06344000001748402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,64,0,1,fp8,fp8,0,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,float16,0,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,float16,0,0.060362666845321655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,fp8,0,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,fp8,0,0.05996266504128774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,64,128,1,fp8,fp8,0,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,fp8,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,float16,0,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,64,0,1,fp8,fp8,0,0.058143998185793556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,float16,0,0.060506666700045265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,64,128,1,fp8,fp8,0,0.05780800183614095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,64,0,1,fp8,fp8,0,0.05772800246874491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,float16,0,0.060271998246510826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,float16,0,0.05986666679382324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,fp8,0,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,64,128,1,fp8,fp8,0,0.05958933134873708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,fp8,0,0.060880000392595925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,64,0,1,fp8,fp8,0,0.059077332417170204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,float16,0,0.05977599819501241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,fp8,0,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,64,128,1,fp8,fp8,0,0.059706668059031166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,fp8,0,0.05985066791375478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,64,0,1,fp8,fp8,0,0.058575997749964394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,float16,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,float16,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,fp8,0,0.03917866696914037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,64,128,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,64,0,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,float16,0,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,float16,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,64,128,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,fp8,0,0.03801066676775614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,64,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,float16,0,0.038704000413417816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,float16,0,0.037871999045213066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,64,128,1,fp8,fp8,0,0.037130666275819145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,64,128,1,fp8,fp8,0,0.03658133248488108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,fp8,0,0.03723733375469843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,64,0,1,fp8,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,fp8,0,0.03847466657559077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,fp8,0,0.03921599934498469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,64,128,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,64,0,1,fp8,fp8,0,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,float16,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,fp8,0,0.03810133288304011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,64,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,float16,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,64,128,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,float16,0,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,64,0,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,64,128,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,float16,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,fp8,0,0.023978665471076965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,fp8,0,0.02473066747188568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,64,0,1,fp8,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,float16,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,64,128,1,fp8,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,64,0,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,64,128,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,64,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,float16,0,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,64,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,64,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,64,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,64,128,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,64,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,64,0,1,fp8,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,float16,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,float16,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,64,128,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,64,128,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,float16,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,64,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,64,128,1,fp8,fp8,0,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,float16,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,64,128,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,float16,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,float16,0,0.27740800380706787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,float16,0,0.2772639989852905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,fp8,0,0.2766559918721517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,64,128,1,fp8,fp8,0,0.2590986688931783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,fp8,0,0.2754240036010742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,64,0,1,fp8,fp8,0,0.25969066222508747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,float16,0,0.276528000831604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,float16,0,0.27730133136113483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,fp8,0,0.275221327940623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,64,128,1,fp8,fp8,0,0.26266666253407794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,fp8,0,0.27532800038655597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,64,0,1,fp8,fp8,0,0.2618933320045471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,64,128,1,fp8,fp8,0,0.2619946599006653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,float16,0,0.27669866879781085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,float16,0,0.2773333390553792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,fp8,0,0.2762346665064494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,fp8,0,0.2773173252741496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,64,0,1,fp8,fp8,0,0.26372265815734863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,float16,0,0.2784213423728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,float16,0,0.28010133902231854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,fp8,0,0.27772267659505206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,64,128,1,fp8,fp8,0,0.2671999931335449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,fp8,0,0.279530664285024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,64,0,1,fp8,fp8,0,0.26680533091227215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,float16,0,0.15195733308792114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,float16,0,0.1520693302154541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,fp8,0,0.14975466330846152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,64,128,1,fp8,fp8,0,0.14964266618092856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,fp8,0,0.1499626636505127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,64,0,1,fp8,fp8,0,0.1507200002670288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,fp8,0,0.1421226660410563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,float16,0,0.1446346640586853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,float16,0,0.1433013379573822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,fp8,0,0.1441386640071869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,64,128,1,fp8,fp8,0,0.13403200109799704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,64,0,1,fp8,fp8,0,0.13361600041389465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,float16,0,0.14300266901652017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,float16,0,0.14391466975212097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,fp8,0,0.1418293317159017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,64,128,1,fp8,fp8,0,0.13572266697883606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,fp8,0,0.14387733737627664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,64,0,1,fp8,fp8,0,0.13435199856758118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,float16,0,0.1451573371887207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,float16,0,0.14565866192181906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,fp8,0,0.1444533367951711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,64,128,1,fp8,fp8,0,0.13732799887657166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,fp8,0,0.14525333046913147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,64,0,1,fp8,fp8,0,0.13852266470591226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,fp8,0,0.14597333470980325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,float16,0,0.14733866850535074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,float16,0,0.145797332127889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,fp8,0,0.1455839971701304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,64,128,1,fp8,fp8,0,0.14040000240008035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,64,0,1,fp8,fp8,0,0.13946666320165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,float16,0,0.08276266853014629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,float16,0,0.08260799944400787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,fp8,0,0.08243733147780101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,64,128,1,fp8,fp8,0,0.08376000324885051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,fp8,0,0.07878399888674419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,fp8,0,0.08257066706816356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,64,0,1,fp8,fp8,0,0.08275199929873149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,float16,0,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,float16,0,0.07843199868996938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,float16,0,0.07900799810886383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,64,128,1,fp8,fp8,0,0.07486400008201599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,fp8,0,0.079434668024381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,64,0,1,fp8,fp8,0,0.07435200115044911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,64,0,1,fp8,fp8,0,0.07453866799672444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,float16,0,0.0787360022465388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,fp8,0,0.07840533554553986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,fp8,0,0.08037866652011871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,64,128,1,fp8,fp8,0,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,fp8,0,0.08054933448632558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,float16,0,0.08004799981911977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,float16,0,0.08061333497365315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,float16,0,0.07929599781831105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,64,128,1,fp8,fp8,0,0.07594666878382365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,fp8,0,0.0801333338022232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,fp8,0,0.08041599889596303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,64,0,1,fp8,fp8,0,0.07613866527875264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,float16,0,0.08051200211048126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,fp8,0,0.07826666533946991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,64,128,1,fp8,fp8,0,0.07633600135644276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,64,0,1,fp8,fp8,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,float16,0,0.047637333472569786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,float16,0,0.046666666865348816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,float16,0,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,fp8,0,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,64,128,1,fp8,fp8,0,0.04700266818205515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,fp8,0,0.04782933493455251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,64,0,1,fp8,fp8,0,0.04529066880544027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,64,0,1,fp8,fp8,0,0.04726399978001913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,float16,0,0.04619733492533366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,float16,0,0.04700266818205515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,fp8,0,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,64,128,1,fp8,fp8,0,0.045381332437197365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,fp8,0,0.04669333497683207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,float16,0,0.046223998069763184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,fp8,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,fp8,0,0.047338664531707764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,64,128,1,fp8,fp8,0,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,64,0,1,fp8,fp8,0,0.04438399771849314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,64,0,1,fp8,fp8,0,0.045610666275024414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,float16,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,float16,0,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,64,128,1,fp8,fp8,0,0.04458666841189066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,float16,0,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,float16,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,float16,0,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,64,128,1,fp8,fp8,0,0.04451199869314829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,fp8,0,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,64,0,1,fp8,fp8,0,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,float16,0,0.03048533449570338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,fp8,0,0.03057066599527995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,64,128,1,fp8,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,fp8,0,0.031125334401925404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,64,0,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,64,128,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,fp8,0,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,64,0,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,float16,0,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,float16,0,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,64,128,1,fp8,fp8,0,0.02978666623433431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,fp8,0,0.030879999200503033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,64,128,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,64,0,1,fp8,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,float16,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,fp8,0,0.031119999786218006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,fp8,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,64,0,1,fp8,fp8,0,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,float16,0,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,fp8,0,0.030261332790056866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,64,128,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,64,0,1,fp8,fp8,0,0.030250666042168934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,64,128,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,float16,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,64,128,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,64,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,64,128,1,fp8,fp8,0,0.01993600030740102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,64,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,float16,0,0.020693333198626835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,float16,0,0.020762667059898376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,64,128,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,64,0,1,fp8,fp8,0,0.02053333322207133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,64,128,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,64,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,float16,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,64,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,float16,0,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,64,0,1,fp8,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,float16,0,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,float16,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,64,128,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,64,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,fp8,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,64,0,1,fp8,fp8,0,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,64,128,1,fp8,fp8,0,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,64,0,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,64,128,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,64,0,1,fp8,fp8,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,64,0,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,float16,0,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,64,128,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,float16,0,0.22193066279093424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,float16,0,0.22182399034500122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,fp8,0,0.22164799769719443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,64,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,64,128,1,fp8,fp8,0,0.20457599560419717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,fp8,0,0.22241600354512533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,64,0,1,fp8,fp8,0,0.20533865690231323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,float16,0,0.2230773369471232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,float16,0,0.22194133202234903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,fp8,0,0.223471999168396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,64,128,1,fp8,fp8,0,0.20533865690231323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,fp8,0,0.22111467520395914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,float16,0,0.2243679960568746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,64,0,1,fp8,fp8,0,0.205567995707194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,float16,0,0.22251200675964355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,fp8,0,0.22275733947753906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,64,128,1,fp8,fp8,0,0.20887466271718344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,float16,0,0.22361600399017334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,fp8,0,0.22342399756113687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,64,0,1,fp8,fp8,0,0.2068906625111898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,float16,0,0.2244960069656372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,fp8,0,0.22340265909830728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,64,128,1,fp8,fp8,0,0.21033066511154175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,fp8,0,0.22472000122070312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,64,0,1,fp8,fp8,0,0.21040532986323038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,float16,0,0.12219199538230896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,float16,0,0.1220266620318095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,fp8,0,0.12126933534940083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,64,128,1,fp8,fp8,0,0.11729066570599873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,fp8,0,0.12133866548538208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,64,0,1,fp8,fp8,0,0.11928000052769978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,float16,0,0.11901332934697469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,float16,0,0.11929600437482198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,float16,0,0.11739200353622437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,fp8,0,0.11894399921099345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,64,128,1,fp8,fp8,0,0.11124266187349956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,fp8,0,0.11764267086982727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,64,0,1,fp8,fp8,0,0.11003200213114421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,float16,0,0.1193386713663737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,float16,0,0.11727999647458394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,float16,0,0.11756267150243123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,fp8,0,0.11831999818483989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,64,128,1,fp8,fp8,0,0.10990933577219646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,fp8,0,0.11730666955312093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,64,0,1,fp8,fp8,0,0.11126933495203654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,fp8,0,0.11895466844240825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,64,128,1,fp8,fp8,0,0.11105599999427795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,fp8,0,0.11724799871444702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,64,0,1,fp8,fp8,0,0.10998400052388509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,float16,0,0.11831466356913249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,float16,0,0.1176533301671346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,fp8,0,0.11794666449228923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,64,128,1,fp8,fp8,0,0.11146666606267293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,fp8,0,0.11865599950154622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,64,0,1,fp8,fp8,0,0.11110933621724446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,float16,0,0.06666666766007741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,float16,0,0.0652213344971339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,float16,0,0.06621866424878438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,fp8,0,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,64,128,1,fp8,fp8,0,0.061946665247281395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,64,128,1,fp8,fp8,0,0.06472533444563548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,fp8,0,0.06669333577156067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,64,0,1,fp8,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,float16,0,0.06574399769306183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,fp8,0,0.06632533172766368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,fp8,0,0.06629866858323415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,64,0,1,fp8,fp8,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,float16,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,float16,0,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,float16,0,0.06613333523273468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,fp8,0,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,64,128,1,fp8,fp8,0,0.06295466423034668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,64,128,1,fp8,fp8,0,0.062405332922935486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,fp8,0,0.0661599983771642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,64,0,1,fp8,fp8,0,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,float16,0,0.06614399949709575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,float16,0,0.06624533236026764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,fp8,0,0.06607466439406078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,64,0,1,fp8,fp8,0,0.06234133243560791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,float16,0,0.06646400193373363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,float16,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,fp8,0,0.06610666712125142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,64,128,1,fp8,fp8,0,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,64,128,1,fp8,fp8,0,0.06294933458169301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,fp8,0,0.06611733138561249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,64,0,1,fp8,fp8,0,0.06257066627343495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,float16,0,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,fp8,0,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,64,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,float16,0,0.038032000263532005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,float16,0,0.03782933453718821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,64,128,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,fp8,0,0.0379573330283165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,64,0,1,fp8,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,64,0,1,fp8,fp8,0,0.037045332292715706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,float16,0,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,float16,0,0.03933866570393244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,64,128,1,fp8,fp8,0,0.037402667105197906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,float16,0,0.03876800090074539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,float16,0,0.038719999293486275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,fp8,0,0.0395413339138031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,64,128,1,fp8,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,64,0,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,float16,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,float16,0,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,64,128,1,fp8,fp8,0,0.038293334345022835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,fp8,0,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,64,0,1,fp8,fp8,0,0.038773333032925926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,float16,0,0.02611200014750163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,64,128,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,64,0,1,fp8,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,64,0,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,float16,0,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,float16,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,64,128,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,float16,0,0.025770666698614757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,64,128,1,fp8,fp8,0,0.025968000292778015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,64,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,float16,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,fp8,0,0.02611733227968216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,64,128,1,fp8,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,64,0,1,fp8,fp8,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,64,128,1,fp8,fp8,0,0.025722667574882507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,fp8,0,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,64,0,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,fp8,0,0.020682666450738907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,64,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,64,128,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,fp8,0,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,64,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,64,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,fp8,0,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,64,128,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,float16,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,64,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,64,0,1,fp8,fp8,0,0.016149333367745083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,float16,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,64,0,1,fp8,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,float16,0,0.1962666710217794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,float16,0,0.19612266620000204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,fp8,0,0.1948053240776062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,64,128,1,fp8,fp8,0,0.18280533949534097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,fp8,0,0.19471466541290283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,64,0,1,fp8,fp8,0,0.18131732940673828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,float16,0,0.19673067331314087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,float16,0,0.19537067413330078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,fp8,0,0.19566933314005533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,64,128,1,fp8,fp8,0,0.18277867635091147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,fp8,0,0.19534399112065634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,64,0,1,fp8,fp8,0,0.18291733662287393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,float16,0,0.19434134165445963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,float16,0,0.195306658744812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,fp8,0,0.19535466035207114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,float16,0,0.1954186757405599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,64,128,1,fp8,fp8,0,0.1828426718711853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,fp8,0,0.1953493356704712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,64,0,1,fp8,fp8,0,0.18285866578420004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,float16,0,0.1954560081164042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,fp8,0,0.19406932592391968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,64,128,1,fp8,fp8,0,0.1835306684176127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,fp8,0,0.19605867067972818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,64,0,1,fp8,fp8,0,0.18248534202575684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,float16,0,0.10497066378593445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,64,0,1,fp8,fp8,0,0.10063999891281128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,float16,0,0.10487999518712361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,64,128,1,fp8,fp8,0,0.10017066200574239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,fp8,0,0.10522133111953735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,float16,0,0.10314666231473286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,float16,0,0.10333866874376933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,fp8,0,0.1048906644185384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,64,128,1,fp8,fp8,0,0.09716266393661499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,fp8,0,0.10504000385602315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,64,0,1,fp8,fp8,0,0.09848533074061076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,float16,0,0.10493333141009013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,float16,0,0.10332266489664714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,fp8,0,0.10499200224876404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,64,128,1,fp8,fp8,0,0.09804266691207886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,fp8,0,0.10309333602587382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,64,0,1,fp8,fp8,0,0.09742933511734009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,float16,0,0.10299733281135559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,float16,0,0.10326932867368062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,fp8,0,0.10326400399208069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,64,128,1,fp8,fp8,0,0.09890133142471313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,fp8,0,0.10366933544476827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,64,0,1,fp8,fp8,0,0.09895466764767964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,float16,0,0.1032319962978363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,float16,0,0.1032426655292511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,fp8,0,0.10318932930628459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,64,128,1,fp8,fp8,0,0.09878399968147278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,fp8,0,0.10354666908582051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,64,0,1,fp8,fp8,0,0.09782399733861287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,float16,0,0.05985066791375478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,float16,0,0.057855998476346336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,fp8,0,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,64,128,1,fp8,fp8,0,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,fp8,0,0.05955733358860016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,64,0,1,fp8,fp8,0,0.057189335425694786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,float16,0,0.057909334699312844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,float16,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,float16,0,0.05859733124574026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,fp8,0,0.057818666100502014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,64,128,1,fp8,fp8,0,0.05559466779232025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,fp8,0,0.05816533168156942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,64,0,1,fp8,fp8,0,0.055162668228149414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,float16,0,0.05824000140031179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,fp8,0,0.058703998724619545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,64,128,1,fp8,fp8,0,0.05585066477457682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,64,0,1,fp8,fp8,0,0.05495999753475189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,float16,0,0.05899733304977417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,float16,0,0.05793066819508871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,fp8,0,0.0591839998960495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,64,128,1,fp8,fp8,0,0.056128000219662987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,fp8,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,64,0,1,fp8,fp8,0,0.055914665261904396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,float16,0,0.05810666580994924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,float16,0,0.05917333563168844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,64,128,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,64,0,1,fp8,fp8,0,0.03525333354870478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,64,128,1,fp8,fp8,0,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,64,0,1,fp8,fp8,0,0.05649599929650625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,float16,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,fp8,0,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,float16,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,float16,0,0.034671999514102936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,float16,0,0.03538133452335993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,64,128,1,fp8,fp8,0,0.033674667278925575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,fp8,0,0.03521066655715307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,fp8,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,64,0,1,fp8,fp8,0,0.03385599950949351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,64,128,1,fp8,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,fp8,0,0.0351946676770846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,64,0,1,fp8,fp8,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,64,128,1,fp8,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,float16,0,0.03577066709597906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,64,0,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,float16,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,float16,0,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,fp8,0,0.03522133330504099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,fp8,0,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,float16,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,float16,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,64,128,1,fp8,fp8,0,0.03513066718975703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,64,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,64,0,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,float16,0,0.024959998826185863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,fp8,0,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,float16,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,float16,0,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,float16,0,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,64,128,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,64,0,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,float16,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,64,0,1,fp8,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,64,128,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,fp8,0,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,float16,0,0.02496533344189326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,64,128,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,64,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,float16,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,64,128,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,64,0,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,64,0,1,fp8,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,float16,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,float16,0,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,float16,0,0.018122666825850803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,64,128,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,64,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,fp8,0,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,64,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,float16,0,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,64,0,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,fp8,0,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,64,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,64,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,64,128,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,64,0,1,fp8,fp8,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,64,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,64,0,1,fp8,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,float16,0,1.4367359479268391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,64,128,1,fp8,fp8,0,1.300544023513794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,fp8,0,1.4461973508199055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,float16,0,1.455301284790039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,fp8,0,1.4693867365519206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,64,128,1,fp8,fp8,0,1.3237600326538086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,float16,0,1.4690613746643066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,fp8,0,1.480581283569336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,float16,0,8.638869603474935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,64,0,1,fp8,fp8,0,7.962474822998047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,fp8,0,8.662314732869467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,64,128,1,fp8,fp8,0,1.3388853073120117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,float16,0,8.675674438476562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,64,0,1,fp8,fp8,0,8.000021616617838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,fp8,0,8.681781133015951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,float16,0,1.5047893524169922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,fp8,0,1.5194932619730632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,float16,0,8.679818471272787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,64,128,1,fp8,fp8,0,1.3855306307474773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,float16,0,0.8443466822306315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,fp8,0,0.8625280062357584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,64,128,1,fp8,fp8,0,0.7982186476389567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,64,0,1,fp8,fp8,0,7.999647776285808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,fp8,0,8.684885025024414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,float16,0,4.531770706176758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,float16,0,0.7535893122355143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,float16,0,8.745178858439127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,fp8,0,0.7600693702697754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,fp8,0,4.561013221740723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,64,0,1,fp8,fp8,0,4.188511848449707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,64,0,1,fp8,fp8,0,8.045514424641928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,64,128,1,fp8,fp8,0,0.6859306494394938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,fp8,0,8.754592259724935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,float16,0,0.7587839762369791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,fp8,0,0.7642079989115397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,64,128,1,fp8,fp8,0,0.6923840045928955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,float16,0,0.7640746434529623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,float16,0,4.421962738037109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,64,0,1,fp8,fp8,0,4.068469365437825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,fp8,0,4.426805178324382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,fp8,0,0.7717173099517822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,64,128,1,fp8,fp8,0,0.7003200054168701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,float16,0,4.424570719401042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,64,0,1,fp8,fp8,0,4.082784016927083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,fp8,0,4.42955207824707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,float16,0,0.7792533238728842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,fp8,0,0.7911466757456461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,float16,0,4.427407900492351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,64,128,1,fp8,fp8,0,0.721343994140625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,float16,0,0.4639146725336711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,fp8,0,0.47607465585072833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,64,128,1,fp8,fp8,0,0.4438933531443278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,64,0,1,fp8,fp8,0,4.092464129130046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,fp8,0,4.436869303385417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,float16,0,2.3675999641418457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,float16,0,4.450480143229167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,float16,0,0.4221706787745158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,fp8,0,0.42470399538675946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,64,0,1,fp8,fp8,0,4.10321585337321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,fp8,0,2.3837599754333496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,64,128,1,fp8,fp8,0,0.3884693384170532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,fp8,0,4.465322812398274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,64,0,1,fp8,fp8,0,2.1932427088419595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,float16,0,0.4232693513234456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,float16,0,2.308037281036377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,fp8,0,0.426800012588501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,64,128,1,fp8,fp8,0,0.39325865109761554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,fp8,0,2.3179787000020347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,64,0,1,fp8,fp8,0,2.134672005971273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,float16,0,0.42720532417297363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,fp8,0,0.43157867590586346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,float16,0,2.309471925099691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,64,128,1,fp8,fp8,0,0.3957759936650594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,fp8,0,2.3264800707499185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,64,0,1,fp8,fp8,0,2.1367200215657554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,float16,0,0.43483734130859375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,float16,0,2.3198986053466797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,fp8,0,0.43908798694610596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,64,128,1,fp8,fp8,0,0.405898650487264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,fp8,0,2.3231892585754395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,64,0,1,fp8,fp8,0,2.146970589955648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,float16,0,0.310805340607961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,fp8,0,0.31406400601069134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,float16,0,2.3281973203023276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,64,128,1,fp8,fp8,0,0.2884320020675659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,float16,0,0.3129173318545024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,float16,0,1.3223040103912354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,fp8,0,2.3329013188680015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,64,0,1,fp8,fp8,0,2.1540427207946777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,fp8,0,0.3099413315455119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,64,128,1,fp8,fp8,0,0.2913279930750529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,fp8,0,1.3230986595153809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,64,0,1,fp8,fp8,0,1.215498685836792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,float16,0,1.3094773292541504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,float16,0,0.3090719978014628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,fp8,0,0.3125973343849182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,64,128,1,fp8,fp8,0,0.2885599931081136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,fp8,0,1.3130613168080647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,64,0,1,fp8,fp8,0,1.2124746640523274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,float16,0,0.3138773242632548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,float16,0,1.3186879952748616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,fp8,0,0.31013866265614826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,64,128,1,fp8,fp8,0,0.2892586588859558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,fp8,0,1.310805320739746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,64,0,1,fp8,fp8,0,1.2140959898630779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,float16,0,1.3151466846466064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,float16,0,0.30963732798894245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,fp8,0,0.3104533354441325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,fp8,0,1.3103946844736736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,64,128,1,fp8,fp8,0,0.2895680069923401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,64,0,1,fp8,fp8,0,1.2204533418019612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,float16,0,1.3187092940012615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,fp8,0,1.316383997599284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,64,0,1,fp8,fp8,0,1.2182559967041016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,float16,0,1.074896017710368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,fp8,0,1.0814239978790283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,64,128,1,fp8,fp8,0,0.9686506589253744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,float16,0,1.082154671351115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,fp8,0,1.0923893451690674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,64,128,1,fp8,fp8,0,0.9836853345235189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,float16,0,1.0929120381673176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,float16,0,5.116357485453288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,64,0,1,fp8,fp8,0,4.7105865478515625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,fp8,0,5.123039881388347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,fp8,0,1.1036319732666016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,64,128,1,fp8,fp8,0,0.9974453449249268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,float16,0,5.119322776794434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,fp8,0,5.12773863474528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,float16,0,1.1193013191223145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,64,0,1,fp8,fp8,0,4.716986656188965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,fp8,0,1.1302506923675537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,64,128,1,fp8,fp8,0,1.029909372329712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,float16,0,5.127354621887207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,float16,0,0.6377333402633667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,fp8,0,0.6525226831436157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,64,128,1,fp8,fp8,0,0.60044264793396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,64,0,1,fp8,fp8,0,4.735370635986328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,fp8,0,5.153114636739095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,float16,0,2.713189442952474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,float16,0,0.570032000541687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,float16,0,5.173882802327474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,fp8,0,0.5743039846420288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,64,0,1,fp8,fp8,0,4.763978640238444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,64,128,1,fp8,fp8,0,0.5196479956309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,fp8,0,5.183717409769694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,fp8,0,2.7269598642985025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,64,0,1,fp8,fp8,0,2.507808049519857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,float16,0,2.633983929951986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,float16,0,0.5738666852315267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,fp8,0,0.5785919825236002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,64,128,1,fp8,fp8,0,0.5235466559727987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,fp8,0,2.6376479466756186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,float16,0,0.5799680153528849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,64,0,1,fp8,fp8,0,2.4244373639424643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,fp8,0,0.5850346485773722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,64,128,1,fp8,fp8,0,0.5311093330383301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,float16,0,2.637850602467855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,float16,0,0.5912266572316488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,fp8,0,2.6379572550455728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,64,0,1,fp8,fp8,0,2.435077349344889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,fp8,0,0.598741332689921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,float16,0,2.6421173413594565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,64,128,1,fp8,fp8,0,0.5454773505528768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,fp8,0,2.6464106241861978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,float16,0,0.35258666674296063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,64,0,1,fp8,fp8,0,2.4372533162434897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,fp8,0,0.3617386817932129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,float16,0,2.661077340443929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,64,128,1,fp8,fp8,0,0.3384213447570801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,float16,0,1.4352213541666667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,float16,0,0.3193333347638448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,fp8,0,2.6649386088053384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,64,0,1,fp8,fp8,0,2.454282601674398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,fp8,0,0.3200213313102722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,fp8,0,1.4431254069010417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,64,128,1,fp8,fp8,0,0.2974826693534851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,64,0,1,fp8,fp8,0,1.3318506876627605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,float16,0,1.3934346834818523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,float16,0,0.3204053243001302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,fp8,0,0.3240586717923482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,64,128,1,fp8,fp8,0,0.297925333182017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,fp8,0,1.392778714497884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,64,0,1,fp8,fp8,0,1.2908906936645508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,float16,0,0.32501333951950073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,float16,0,1.3928532600402832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,fp8,0,0.32778133948644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,64,128,1,fp8,fp8,0,0.3036266764005025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,64,0,1,fp8,fp8,0,1.2908426920572917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,fp8,0,1.3987092971801758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,float16,0,0.3302080035209656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,float16,0,1.3977972666422527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,fp8,0,0.33460799853007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,64,128,1,fp8,fp8,0,0.30988800525665283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,fp8,0,1.4058027267456055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,float16,0,0.2363146742184957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,64,0,1,fp8,fp8,0,1.294554630915324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,float16,0,1.411237398783366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,fp8,0,0.2362346649169922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,64,128,1,fp8,fp8,0,0.21970667441685995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,float16,0,0.82042129834493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,fp8,0,1.4123199780782063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,64,0,1,fp8,fp8,0,1.306997299194336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,float16,0,0.2341866691907247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,fp8,0,0.8226133187611898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,fp8,0,0.2321173350016276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,float16,0,0.8169866402943929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,64,0,1,fp8,fp8,0,0.7569279670715332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,64,128,1,fp8,fp8,0,0.21754133701324463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,float16,0,0.23197867472966513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,fp8,0,0.8145813147226969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,fp8,0,0.23297067483266196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,64,0,1,fp8,fp8,0,0.7568533420562744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,64,128,1,fp8,fp8,0,0.2177706758181254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,float16,0,0.8136800130208334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,float16,0,0.23602133989334106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,fp8,0,0.8147946993509928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,64,0,1,fp8,fp8,0,0.7545546690622965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,fp8,0,0.23406400283177695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,64,128,1,fp8,fp8,0,0.21926933526992798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,float16,0,0.817733367284139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,float16,0,0.23635200659434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,fp8,0,0.8224000136057535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,64,0,1,fp8,fp8,0,0.7554986476898193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,fp8,0,0.23399466276168823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,64,128,1,fp8,fp8,0,0.21969600518544516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,float16,0,0.8163253466288248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,fp8,0,0.8209226926167806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,64,0,1,fp8,fp8,0,0.7612266540527344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,float16,0,0.8933227062225342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,fp8,0,0.900719960530599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,64,128,1,fp8,fp8,0,0.8078986803690592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,float16,0,0.9005653063456217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,fp8,0,0.9079466660817465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,64,128,1,fp8,fp8,0,0.818399985631307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,float16,0,3.6898934046427407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,float16,0,0.9086080392201742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,64,0,1,fp8,fp8,0,3.3912906646728516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,fp8,0,3.702656110127767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,float16,0,3.695370674133301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,fp8,0,0.918405294418335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,64,128,1,fp8,fp8,0,0.8284213542938232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,fp8,0,3.6982558568318686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,64,0,1,fp8,fp8,0,3.4086828231811523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,float16,0,0.9284266630808512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,fp8,0,0.9405866463979086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,64,128,1,fp8,fp8,0,0.8541759649912516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,float16,0,3.7054131825764975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,float16,0,0.5308746496836344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,fp8,0,0.5430613358815511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,64,0,1,fp8,fp8,0,3.4191306432088218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,fp8,0,3.713653246561686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,64,128,1,fp8,fp8,0,0.5028426647186279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,float16,0,1.9764053026835124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,float16,0,3.7354507446289062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,float16,0,0.4724373420079549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,fp8,0,3.7516587575276694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,64,0,1,fp8,fp8,0,3.4386879603068032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,fp8,0,0.478005329767863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,64,128,1,fp8,fp8,0,0.4335413376490275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,fp8,0,1.9889814058939617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,64,0,1,fp8,fp8,0,1.8326773643493652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,float16,0,1.9105067253112793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,float16,0,0.47620264689127606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,fp8,0,0.48157866795857746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,64,128,1,fp8,fp8,0,0.43883732954661053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,fp8,0,1.910912036895752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,64,0,1,fp8,fp8,0,1.7644906044006348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,float16,0,0.48230401674906415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,float16,0,1.9096105893452961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,fp8,0,0.4867680072784424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,64,128,1,fp8,fp8,0,0.4431733290354411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,64,0,1,fp8,fp8,0,1.7633652687072754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,fp8,0,1.9184106190999348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,float16,0,0.49031468232472736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,float16,0,1.9218559265136719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,fp8,0,0.498149315516154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,64,128,1,fp8,fp8,0,0.45530664920806885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,fp8,0,1.9243787129720051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,64,0,1,fp8,fp8,0,1.7688533465067546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,float16,0,0.29554132620493573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,float16,0,1.934874693552653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,fp8,0,0.30170132716496784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,64,128,1,fp8,fp8,0,0.28308266401290894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,float16,0,1.0568959712982178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,float16,0,0.26334933439890545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,fp8,0,1.9351733525594075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,64,0,1,fp8,fp8,0,1.7847572962443035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,fp8,0,1.0632906754811604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,fp8,0,0.2649386723836263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,float16,0,1.019968032836914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,64,128,1,fp8,fp8,0,0.2472426692644755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,64,0,1,fp8,fp8,0,0.9835200309753418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,float16,0,0.2650826573371887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,fp8,0,1.0197066466013591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,fp8,0,0.26899200677871704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,64,0,1,fp8,fp8,0,0.9453386465708414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,64,128,1,fp8,fp8,0,0.25007466475168866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,float16,0,1.020090659459432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,float16,0,0.27085334062576294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,fp8,0,0.2733173370361328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,fp8,0,1.0242400169372559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,64,0,1,fp8,fp8,0,0.9495519797007242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,float16,0,0.27594133218129474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,64,128,1,fp8,fp8,0,0.25311466058095294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,float16,0,1.0246666272481282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,fp8,0,1.0300052960713704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,fp8,0,0.28066666920979816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,64,0,1,fp8,fp8,0,0.9515573183695475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,float16,0,1.0315039952596028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,64,128,1,fp8,fp8,0,0.26019734144210815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,float16,0,0.20250133673350015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,fp8,0,0.20113599300384521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,fp8,0,1.0394240220387776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,64,128,1,fp8,fp8,0,0.18923733631769815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,float16,0,0.6196266810099283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,64,0,1,fp8,fp8,0,0.9572959740956625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,float16,0,0.19923200209935507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,fp8,0,0.6149119933446249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,64,0,1,fp8,fp8,0,0.570799986521403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,fp8,0,0.19916266202926636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,64,128,1,fp8,fp8,0,0.1869866649309794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,float16,0,0.6116693417231241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,fp8,0,0.6132853428522745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,float16,0,0.19913599888483682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,64,0,1,fp8,fp8,0,0.5653973420461019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,fp8,0,0.1972586711247762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,64,128,1,fp8,fp8,0,0.18483734130859375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,float16,0,0.6099679867426554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,fp8,0,0.6146933237711588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,64,128,1,fp8,fp8,0,0.18545067310333252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,float16,0,0.20039467016855875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,64,0,1,fp8,fp8,0,0.5697760184605917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,fp8,0,0.1973653237024943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,float16,0,0.6109280188878378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,float16,0,0.20105600357055664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,fp8,0,0.6113226811091105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,64,0,1,fp8,fp8,0,0.5653440157572428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,fp8,0,0.2019360065460205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,fp8,0,0.6110933224360148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,64,0,1,fp8,fp8,0,0.5698399941126505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,float16,0,0.61353067557017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,64,128,1,fp8,fp8,0,0.18916799624760947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,float16,0,1.3973973592122395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,fp8,0,1.4076107343037922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,64,128,1,fp8,fp8,0,1.2630346616109211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,float16,0,1.4220693906148274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,fp8,0,1.4297439257303874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,64,128,1,fp8,fp8,0,1.2855679988861084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,float16,0,4.901066780090332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,float16,0,1.4315039316813152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,64,0,1,fp8,fp8,0,4.496778806050618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,fp8,0,4.901285489400228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,float16,0,4.924261411031087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,fp8,0,1.4440053304036458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,64,128,1,fp8,fp8,0,1.3024533589680989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,fp8,0,4.932511965433757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,64,0,1,fp8,fp8,0,4.529722531636556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,float16,0,1.470677375793457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,float16,0,4.931642532348633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,fp8,0,1.484389305114746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,64,128,1,fp8,fp8,0,1.3472906748453777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,float16,0,0.8086346785227457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,64,0,1,fp8,fp8,0,4.534159978230794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,fp8,0,4.944208145141602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,fp8,0,0.8263146877288818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,64,128,1,fp8,fp8,0,0.760703961054484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,float16,0,4.9934508005778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,float16,0,2.6082560221354165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,fp8,0,5.007717450459798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,float16,0,0.716048002243042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,64,0,1,fp8,fp8,0,4.587125460306804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,fp8,0,0.7235626379648844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,fp8,0,2.6222559611002603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,64,128,1,fp8,fp8,0,0.6498133341471354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,64,0,1,fp8,fp8,0,2.4060746828715005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,float16,0,0.7232000033060709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,fp8,0,0.7295413017272949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,float16,0,2.497935930887858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,64,128,1,fp8,fp8,0,0.656602660814921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,fp8,0,2.496607939402262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,64,0,1,fp8,fp8,0,2.2954986890157065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,float16,0,0.7286720275878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,float16,0,2.4993759791056314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,fp8,0,0.7353813648223877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,64,128,1,fp8,fp8,0,0.6657119989395142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,fp8,0,2.5098719596862793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,64,0,1,fp8,fp8,0,2.305429299672445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,float16,0,0.7444159984588623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,float16,0,2.5075839360555015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,fp8,0,0.755839983622233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,64,128,1,fp8,fp8,0,0.6853973070780436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,fp8,0,2.5207467079162598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,64,0,1,fp8,fp8,0,2.311824003855387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,float16,0,0.4266879955927531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,fp8,0,0.43834133942921955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,64,128,1,fp8,fp8,0,0.4053279956181844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,float16,0,2.5304692586263022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,float16,0,1.355738639831543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,float16,0,0.3816479841868083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,fp8,0,2.545802593231201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,64,0,1,fp8,fp8,0,2.3288960456848145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,fp8,0,1.3688799540201824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,fp8,0,0.3858506679534912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,64,0,1,fp8,fp8,0,1.2542133331298828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,float16,0,1.3009173075358074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,64,128,1,fp8,fp8,0,0.35092798868815106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,float16,0,0.38475199540456134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,fp8,0,0.3879306713740031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,fp8,0,1.304154634475708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,64,128,1,fp8,fp8,0,0.35518932342529297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,64,0,1,fp8,fp8,0,1.2033600012461345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,float16,0,1.3062453269958496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,float16,0,0.38891200224558514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,fp8,0,1.3087519804636638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,fp8,0,0.3933226664861043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,64,128,1,fp8,fp8,0,0.3596426645914714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,64,0,1,fp8,fp8,0,1.2049813270568848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,float16,0,1.309333324432373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,float16,0,0.39738134543100995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,fp8,0,0.4020320177078247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,64,128,1,fp8,fp8,0,0.3676053285598755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,fp8,0,1.315013329188029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,64,0,1,fp8,fp8,0,1.210314671198527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,float16,0,0.24213866392771402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,float16,0,1.3213386535644531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,fp8,0,0.24868265787760416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,64,128,1,fp8,fp8,0,0.2319413423538208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,float16,0,0.7353813648223877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,fp8,0,1.3264906406402588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,64,0,1,fp8,fp8,0,1.216917355855306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,float16,0,0.21662400166193643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,fp8,0,0.7402453422546387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,64,0,1,fp8,fp8,0,0.6827359994252523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,fp8,0,0.21759466330210367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,float16,0,0.21727999051411948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,64,128,1,fp8,fp8,0,0.20350400606791177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,float16,0,0.7008372942606608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,fp8,0,0.7043893337249756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,64,0,1,fp8,fp8,0,0.6520800193150839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,fp8,0,0.21916800737380981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,fp8,0,0.7063893477121989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,64,128,1,fp8,fp8,0,0.2052746613820394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,float16,0,0.7047200202941895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,float16,0,0.21981332699457803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,64,0,1,fp8,fp8,0,0.6558613379796346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,fp8,0,0.22195732593536377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,64,128,1,fp8,fp8,0,0.2072533369064331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,64,0,1,fp8,fp8,0,0.6605919996897379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,float16,0,0.7077493667602539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,float16,0,0.22603732347488403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,fp8,0,0.71124267578125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,fp8,0,0.22934399048487344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,64,128,1,fp8,fp8,0,0.21339199940363565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,float16,0,0.7134986718495687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,float16,0,0.1666826605796814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,fp8,0,0.7179146607716879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,64,0,1,fp8,fp8,0,0.6658026774724325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,fp8,0,0.43836267789204914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,fp8,0,0.16748799880345663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,float16,0,0.440282662709554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,64,128,1,fp8,fp8,0,0.15631999572118124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,64,0,1,fp8,fp8,0,0.4038506746292114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,float16,0,0.16460800170898438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,fp8,0,0.1638826628526052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,float16,0,0.43457067012786865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,64,128,1,fp8,fp8,0,0.1521440049012502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,fp8,0,0.4345173438390096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,float16,0,0.16358400384585062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,64,0,1,fp8,fp8,0,0.40507733821868896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,fp8,0,0.16270933548609415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,float16,0,0.43691198031107586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,64,128,1,fp8,fp8,0,0.15212266643842062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,fp8,0,0.43294934431711835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,float16,0,0.16291200121243796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,64,0,1,fp8,fp8,0,0.4041706720987956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,fp8,0,0.16273066401481628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,float16,0,0.4373386700948079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,64,128,1,fp8,fp8,0,0.15403200189272562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,fp8,0,0.43449600537618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,float16,0,0.16272000471750894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,64,0,1,fp8,fp8,0,0.40085331598917645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,fp8,0,0.16361600160598755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,float16,0,0.434933344523112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,64,128,1,fp8,fp8,0,0.15432533621788025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,fp8,0,0.4386986494064331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,64,0,1,fp8,fp8,0,0.40486399332682294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,float16,0,1.0438666343688965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,fp8,0,1.0538400014241536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,64,128,1,fp8,fp8,0,0.9402346611022949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,float16,0,1.0548266569773357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,fp8,0,1.0647892951965332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,64,128,1,fp8,fp8,0,0.9573386510213217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,float16,0,2.974389394124349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,64,0,1,fp8,fp8,0,2.728037198384603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,fp8,0,2.984560012817383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,float16,0,2.985583941141764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,float16,0,1.06549866994222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,fp8,0,1.073573350906372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,64,128,1,fp8,fp8,0,0.9672373135884603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,fp8,0,3.0014241536458335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,64,0,1,fp8,fp8,0,2.741061210632324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,float16,0,1.0910613536834717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,float16,0,3.004330635070801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,fp8,0,1.101253350575765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,64,128,1,fp8,fp8,0,1.004426638285319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,fp8,0,3.0117387771606445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,64,0,1,fp8,fp8,0,2.751978556315104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,float16,0,0.6107146739959717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,fp8,0,0.6248266696929932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,64,128,1,fp8,fp8,0,0.5726986726125082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,float16,0,3.0313440958658853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,float16,0,1.6102399826049805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,fp8,0,3.042522748311361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,float16,0,0.541381319363912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,64,0,1,fp8,fp8,0,2.7953707377115884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,fp8,0,0.5462613503138224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,64,0,1,fp8,fp8,0,1.4894399642944336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,fp8,0,1.6205493609110515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,64,128,1,fp8,fp8,0,0.4912693500518799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,float16,0,1.5272587140401204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,float16,0,0.5463466644287109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,fp8,0,0.552016019821167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,64,128,1,fp8,fp8,0,0.49696000417073566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,64,0,1,fp8,fp8,0,1.4065546989440918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,fp8,0,1.5303893089294434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,float16,0,0.5523573160171509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,float16,0,1.5301599502563477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,fp8,0,0.5564533472061157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,64,128,1,fp8,fp8,0,0.50382399559021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,fp8,0,1.5400373140970867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,64,0,1,fp8,fp8,0,1.4111839930216472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,float16,0,0.5622026522954305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,float16,0,1.5423893928527832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,fp8,0,0.5702613194783529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,64,128,1,fp8,fp8,0,0.5167626539866129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,64,0,1,fp8,fp8,0,1.4205652872721355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,fp8,0,1.5441279411315918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,float16,0,0.32604267199834186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,float16,0,1.5526666641235352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,fp8,0,0.3325546582539876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,64,128,1,fp8,fp8,0,0.30987199147542316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,fp8,0,1.5634400049845378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,float16,0,0.8486026922861735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,64,0,1,fp8,fp8,0,1.4295040766398113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,float16,0,0.2882346709569295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,fp8,0,0.8531733353932699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,64,0,1,fp8,fp8,0,0.7874133586883545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,fp8,0,0.2910719911257426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,fp8,0,0.8069173494974772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,64,128,1,fp8,fp8,0,0.2683573365211487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,float16,0,0.8023680051167806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,float16,0,0.2909653385480245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,64,0,1,fp8,fp8,0,0.7433119614919027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,fp8,0,0.29360532760620117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,64,128,1,fp8,fp8,0,0.2699039975802104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,float16,0,0.8048266569773356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,fp8,0,0.2995199958483378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,float16,0,0.29569600025812787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,fp8,0,0.8093012968699137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,64,0,1,fp8,fp8,0,0.7479306856791178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,64,128,1,fp8,fp8,0,0.2749706705411275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,float16,0,0.8113280137379965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,float16,0,0.30187199513117474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,fp8,0,0.8160426616668701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,float16,0,0.8208053112030029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,fp8,0,0.30595733722050983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,64,0,1,fp8,fp8,0,0.749941349029541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,64,128,1,fp8,fp8,0,0.28244266907374066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,fp8,0,0.8238933086395264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,float16,0,0.18334933121999106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,64,0,1,fp8,fp8,0,0.7583413124084473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,fp8,0,0.18886399269104004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,float16,0,0.4676320155461629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,64,128,1,fp8,fp8,0,0.17787732680638632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,fp8,0,0.4721493323644002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,float16,0,0.16234667102495828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,64,0,1,fp8,fp8,0,0.43667733669281006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,fp8,0,0.1625173290570577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,float16,0,0.4410453240076701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,64,128,1,fp8,fp8,0,0.15028267105420431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,fp8,0,0.44149335225423175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,float16,0,0.44137601057688397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,float16,0,0.1623360017935435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,64,0,1,fp8,fp8,0,0.41016534964243573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,fp8,0,0.16321600476900736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,64,128,1,fp8,fp8,0,0.15195733308792114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,fp8,0,0.443178653717041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,float16,0,0.16437333822250366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,64,0,1,fp8,fp8,0,0.4111146529515584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,fp8,0,0.16679465770721436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,float16,0,0.4458560148874919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,64,128,1,fp8,fp8,0,0.15745066603024802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,fp8,0,0.4460959831873576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,float16,0,0.44920531908671063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,float16,0,0.16876266400019327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,64,0,1,fp8,fp8,0,0.4147893190383911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,fp8,0,0.17174933354059854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,64,128,1,fp8,fp8,0,0.1637173295021057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,float16,0,0.1234826644261678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,fp8,0,0.45282665888468426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,64,0,1,fp8,fp8,0,0.42056532700856525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,float16,0,0.2882506648699443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,fp8,0,0.1236799955368042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,fp8,0,0.12165333827336629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,64,128,1,fp8,fp8,0,0.1174720029036204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,float16,0,0.12176533540089925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,fp8,0,0.28751999139785767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,64,0,1,fp8,fp8,0,0.26744532585144043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,float16,0,0.28480533758799237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,64,128,1,fp8,fp8,0,0.11518399914105733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,fp8,0,0.28730666637420654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,float16,0,0.12340799967447917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,64,0,1,fp8,fp8,0,0.2630026737848918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,fp8,0,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,float16,0,0.28614399830500287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,64,128,1,fp8,fp8,0,0.11550933122634888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,fp8,0,0.28499199946721393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,64,0,1,fp8,fp8,0,0.2643199960390727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,fp8,0,0.2858506639798482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,float16,0,0.12167466680208842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,float16,0,0.28729067246119183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,fp8,0,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,fp8,0,0.12125333150227864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,64,128,1,fp8,fp8,0,0.11540266871452332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,64,128,1,fp8,fp8,0,0.11533866326014201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,64,0,1,fp8,fp8,0,0.26632533470789593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,float16,0,0.12171199917793274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,float16,0,0.2847519914309184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,fp8,0,0.2874773343404134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,64,0,1,fp8,fp8,0,0.26497066020965576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,float16,0,1.378885269165039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,fp8,0,1.3907519976298015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,64,128,1,fp8,fp8,0,1.2435519695281982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,float16,0,1.4020959536234539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,float16,0,2.9963839848836265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,fp8,0,1.4099626541137695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,64,128,1,fp8,fp8,0,1.2688426971435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,64,0,1,fp8,fp8,0,2.737701416015625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,fp8,0,3.0122559865315757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,float16,0,3.0179465611775718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,float16,0,1.4127039909362793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,64,128,1,fp8,fp8,0,1.2803306579589844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,fp8,0,1.4240533510843914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,fp8,0,3.0321547190348306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,64,0,1,fp8,fp8,0,2.7701759338378906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,float16,0,3.0329065322875977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,float16,0,1.4546133677164714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,fp8,0,3.0511627197265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,fp8,0,1.4650026957194011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,64,0,1,fp8,fp8,0,2.7808427810668945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,64,128,1,fp8,fp8,0,1.329642693201701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,float16,0,0.7931199868520101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,fp8,0,0.8064959843953451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,64,128,1,fp8,fp8,0,0.7437386512756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,float16,0,3.0793867111206055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,float16,0,1.6293066342671711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,fp8,0,3.094559987386068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,fp8,0,1.6424320538838704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,float16,0,0.6988480091094971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,64,0,1,fp8,fp8,0,2.82755184173584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,64,0,1,fp8,fp8,0,1.5061119397481282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,fp8,0,0.7041066487630209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,64,128,1,fp8,fp8,0,0.6320906480153402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,float16,0,1.5202773412068684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,float16,0,0.7052640120188395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,fp8,0,1.527418613433838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,64,0,1,fp8,fp8,0,1.3948532740275066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,fp8,0,0.7107199827829996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,64,128,1,fp8,fp8,0,0.6384373505910238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,float16,0,1.5266559918721516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,float16,0,0.7106666564941406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,64,0,1,fp8,fp8,0,1.4042506217956543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,fp8,0,0.7179360389709473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,fp8,0,1.5335040092468262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,64,128,1,fp8,fp8,0,0.645850658416748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,float16,0,1.536245346069336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,float16,0,0.7287946542104086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,64,0,1,fp8,fp8,0,1.4103466669718425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,fp8,0,1.5433972676595051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,fp8,0,0.735146681467692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,64,128,1,fp8,fp8,0,0.6669173240661621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,float16,0,1.5521705945332844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,float16,0,0.4121439854303996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,fp8,0,0.41918400923411053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,64,128,1,fp8,fp8,0,0.38790400822957355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,64,0,1,fp8,fp8,0,1.4310879707336426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,fp8,0,1.5646719932556152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,float16,0,0.8432746728261312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,fp8,0,0.8505493005116781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,float16,0,0.3625653187433879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,64,0,1,fp8,fp8,0,0.7829813162485758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,fp8,0,0.3660213152567546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,64,128,1,fp8,fp8,0,0.3332693378130595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,float16,0,0.790293296178182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,float16,0,0.3652586539586385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,64,0,1,fp8,fp8,0,0.7262240250905355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,fp8,0,0.7931466897328695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,fp8,0,0.36934399604797363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,64,128,1,fp8,fp8,0,0.33740798632303876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,float16,0,0.7928373018900553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,float16,0,0.37145598729451496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,fp8,0,0.7974613507588705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,64,0,1,fp8,fp8,0,0.7319520314534506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,fp8,0,0.37547731399536133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,64,128,1,fp8,fp8,0,0.3405919869740804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,float16,0,0.7968586285909017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,float16,0,0.3797333240509033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,fp8,0,0.8017120361328125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,64,0,1,fp8,fp8,0,0.7368319829305013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,fp8,0,0.3841866652170817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,64,128,1,fp8,fp8,0,0.3491946856180827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,float16,0,0.8078880310058594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,float16,0,0.22291733821233115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,64,128,1,fp8,fp8,0,0.21341866254806519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,fp8,0,0.81440536181132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,fp8,0,0.45713067054748535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,64,0,1,fp8,fp8,0,0.744325319925944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,fp8,0,0.2297226587931315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,float16,0,0.4505759874979655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,float16,0,0.19443732500076294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,64,0,1,fp8,fp8,0,0.422762672106425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,fp8,0,0.1975253423055013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,float16,0,0.4220106601715088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,64,128,1,fp8,fp8,0,0.18306134144465128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,fp8,0,0.4230080048243205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,64,0,1,fp8,fp8,0,0.3938613335291545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,float16,0,0.19618666172027588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,fp8,0,0.19745065768559775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,float16,0,0.42320001125335693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,64,128,1,fp8,fp8,0,0.18487467368443808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,float16,0,0.425434668858846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,fp8,0,0.4254133303960164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,64,0,1,fp8,fp8,0,0.39416531721750897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,float16,0,0.19920533895492554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,fp8,0,0.20080000162124634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,64,128,1,fp8,fp8,0,0.18810667594273886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,fp8,0,0.4281066656112671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,float16,0,0.20644799868265787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,64,0,1,fp8,fp8,0,0.39767467975616455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,fp8,0,0.43458131949106854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,fp8,0,0.21013333400090536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,float16,0,0.4343680143356323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,64,128,1,fp8,fp8,0,0.1933493415514628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,float16,0,0.12982933719952902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,64,0,1,fp8,fp8,0,0.4041279951731364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,64,0,1,fp8,fp8,0,0.24244266748428345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,float16,0,0.2548639973004659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,fp8,0,0.13302399714787802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,64,128,1,fp8,fp8,0,0.12667733430862427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,fp8,0,0.2592586676279704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,fp8,0,0.24032533168792725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,float16,0,0.11567999919255574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,float16,0,0.24049067497253418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,fp8,0,0.11546132961908977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,64,128,1,fp8,fp8,0,0.10514666636784871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,64,0,1,fp8,fp8,0,0.22206934293111166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,float16,0,0.11550399661064148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,fp8,0,0.11548800269762675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,float16,0,0.24091732501983643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,64,128,1,fp8,fp8,0,0.1069493293762207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,fp8,0,0.24070932467778525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,64,0,1,fp8,fp8,0,0.2218453288078308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,float16,0,0.11552533507347107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,float16,0,0.24182399113972983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,fp8,0,0.11685867110888164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,64,128,1,fp8,fp8,0,0.10771733522415161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,float16,0,0.24352532625198364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,fp8,0,0.24355200926462808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,64,0,1,fp8,fp8,0,0.22363199790318808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,float16,0,0.11743999520937602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,fp8,0,0.1197653313477834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,64,128,1,fp8,fp8,0,0.11149866382280986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,fp8,0,0.24649600187937418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,64,128,1,fp8,fp8,0,0.08238400022188823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,64,0,1,fp8,fp8,0,0.22935465971628824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,float16,0,0.08868267138799031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,float16,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,64,0,1,fp8,fp8,0,0.1541973352432251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,float16,0,0.164682666460673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,fp8,0,0.08879466851552327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,fp8,0,0.16437333822250366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,64,0,1,fp8,fp8,0,0.1539306640625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,float16,0,0.1646933356920878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,fp8,0,0.08891733487447102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,64,128,1,fp8,fp8,0,0.0825439989566803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,fp8,0,0.16551466782887778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,float16,0,0.08880533774693807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,float16,0,0.1646453340848287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,fp8,0,0.08878399928410848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,64,128,1,fp8,fp8,0,0.08370666702588399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,fp8,0,0.16541332999865213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,64,0,1,fp8,fp8,0,0.15387200315793356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,float16,0,0.0885813335577647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,float16,0,0.16529599825541177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,fp8,0,0.08850666880607605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,64,128,1,fp8,fp8,0,0.0827466646830241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,fp8,0,0.1639840006828308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,64,0,1,fp8,fp8,0,0.15429866313934326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,float16,0,0.0869599978129069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,float16,0,0.16478400429089865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,fp8,0,0.08866133292516072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,64,128,1,fp8,fp8,0,0.08276799817879994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,64,0,1,fp8,fp8,0,0.15238933761914572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,fp8,0,0.16529066363970438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,float16,0,1.0306933720906575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,fp8,0,1.0394186973571777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,64,128,1,fp8,fp8,0,0.9288907051086426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,float16,0,1.8941334088643391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,float16,0,1.040661334991455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,fp8,0,1.0502666632334392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,fp8,0,1.903663953145345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,64,0,1,fp8,fp8,0,1.7256693840026855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,64,128,1,fp8,fp8,0,0.943120002746582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,float16,0,1.9024640719095867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,float16,0,1.0510719617207844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,fp8,0,1.91428804397583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,64,0,1,fp8,fp8,0,1.7429067293802898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,fp8,0,1.059226671854655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,64,128,1,fp8,fp8,0,0.9521653652191162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,float16,0,1.9182666142781575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,float16,0,1.0770880381266277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,64,0,1,fp8,fp8,0,1.7510612805684407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,fp8,0,1.0859039624532063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,fp8,0,1.9262879689534504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,64,128,1,fp8,fp8,0,0.985098679860433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,float16,0,1.9456853866577148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,float16,0,0.5962560176849365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,fp8,0,0.6076266765594482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,fp8,0,1.952917257944743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,64,128,1,fp8,fp8,0,0.5611733198165894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,float16,0,1.0432693163553874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,64,0,1,fp8,fp8,0,1.7900586128234863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,float16,0,0.5264159838358561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,fp8,0,1.0594613552093506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,64,0,1,fp8,fp8,0,0.971893310546875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,fp8,0,0.5314720074335734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,float16,0,0.9689280192057291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,float16,0,0.532474676767985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,64,128,1,fp8,fp8,0,0.47846933205922443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,fp8,0,0.9710400104522705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,64,0,1,fp8,fp8,0,0.8895359834035238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,fp8,0,0.5359359979629517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,64,128,1,fp8,fp8,0,0.4825386603673299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,float16,0,0.9743893146514893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,float16,0,0.9782400131225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,64,128,1,fp8,fp8,0,0.4896320104598999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,fp8,0,0.5420639912287394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,fp8,0,0.9829066594441732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,float16,0,0.5366346836090088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,fp8,0,0.9800746440887451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,64,0,1,fp8,fp8,0,0.8936159610748291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,float16,0,0.547653317451477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,64,0,1,fp8,fp8,0,0.9004586537679037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,fp8,0,0.5557440121968588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,float16,0,0.9934293429056803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,64,128,1,fp8,fp8,0,0.5018560091654459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,float16,0,0.3131840030352275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,fp8,0,1.0008800029754639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,64,0,1,fp8,fp8,0,0.9136906464894613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,float16,0,0.5463626782099406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,fp8,0,0.3205440044403076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,64,128,1,fp8,fp8,0,0.29639999071757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,fp8,0,0.5543306668599447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,64,0,1,fp8,fp8,0,0.5097173452377319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,float16,0,0.27568533023198444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,fp8,0,0.27727999289830524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,fp8,0,0.5083786646525065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,float16,0,0.5060373147328695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,64,128,1,fp8,fp8,0,0.2547626694043477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,64,0,1,fp8,fp8,0,0.46983468532562256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,64,128,1,fp8,fp8,0,0.2568053404490153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,float16,0,0.277349332968394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,fp8,0,0.27929067611694336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,float16,0,0.5060533285140991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,fp8,0,0.5117013454437256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,float16,0,0.2831733425458272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,64,0,1,fp8,fp8,0,0.4729866584142049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,float16,0,0.5130399862925211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,fp8,0,0.2853066722551982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,64,128,1,fp8,fp8,0,0.26076799631118774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,fp8,0,0.5160693327585856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,64,0,1,fp8,fp8,0,0.475872000058492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,float16,0,0.28829334179560345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,fp8,0,0.2920479973157247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,float16,0,0.521786650021871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,64,128,1,fp8,fp8,0,0.2670346697171529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,float16,0,0.16945600509643555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,fp8,0,0.5231946706771851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,64,0,1,fp8,fp8,0,0.48129598299662274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,float16,0,0.29785066843032837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,fp8,0,0.17363200585047403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,float16,0,0.27179733912150067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,64,128,1,fp8,fp8,0,0.16263467073440552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,fp8,0,0.30189865827560425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,64,0,1,fp8,fp8,0,0.27908267577489215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,float16,0,0.14592533310254416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,fp8,0,0.14693333705266318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,64,128,1,fp8,fp8,0,0.13576533397038779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,fp8,0,0.2728479901949565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,64,0,1,fp8,fp8,0,0.2544426719347636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,float16,0,0.14804266889890036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,float16,0,0.27380265792210895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,fp8,0,0.14898133277893066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,64,128,1,fp8,fp8,0,0.13863466183344522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,fp8,0,0.27504533529281616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,64,0,1,fp8,fp8,0,0.25522132714589435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,float16,0,0.15010133385658264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,float16,0,0.2768266598383586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,fp8,0,0.15253866712252298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,64,128,1,fp8,fp8,0,0.14361066619555155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,fp8,0,0.2783946593602498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,64,0,1,fp8,fp8,0,0.25990400711695355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,fp8,0,0.283242662747701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,float16,0,0.09549333651860555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,float16,0,0.1549013356367747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,float16,0,0.28150399525960285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,fp8,0,0.15784533818562826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,64,128,1,fp8,fp8,0,0.14801067113876343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,64,0,1,fp8,fp8,0,0.2664480010668437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,float16,0,0.17170133193333945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,fp8,0,0.09738133351008098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,64,128,1,fp8,fp8,0,0.09649067123730977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,fp8,0,0.1729066570599874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,64,0,1,fp8,fp8,0,0.16450666387875876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,float16,0,0.08691199620564778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,float16,0,0.161871999502182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,fp8,0,0.08847467104593913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,64,128,1,fp8,fp8,0,0.0806879997253418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,fp8,0,0.16301866372426352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,64,0,1,fp8,fp8,0,0.1483466625213623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,float16,0,0.08655466636021932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,float16,0,0.16262400150299072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,fp8,0,0.08681066830952962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,64,128,1,fp8,fp8,0,0.08242133259773254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,64,128,1,fp8,fp8,0,0.08283733328183492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,fp8,0,0.1639840006828308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,64,0,1,fp8,fp8,0,0.149317334095637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,float16,0,0.08858666817347209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,float16,0,0.1622986694176992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,fp8,0,0.08886933326721191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,fp8,0,0.16342932979265848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,64,0,1,fp8,fp8,0,0.1516586641470591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,float16,0,0.08891200025876363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,float16,0,0.16427733500798544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,float16,0,0.11825066804885864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,fp8,0,0.08986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,64,128,1,fp8,fp8,0,0.08470933636029561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,fp8,0,0.16450666387875876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,64,0,1,fp8,fp8,0,0.1537493367989858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,float16,0,0.06804800033569336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,float16,0,0.06849066913127899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,fp8,0,0.06810666620731354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,64,128,1,fp8,fp8,0,0.06388266881306966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,fp8,0,0.11725333333015442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,fp8,0,0.1181173324584961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,64,0,1,fp8,fp8,0,0.109525332848231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,float16,0,0.11707733074824016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,fp8,0,0.06809066732724507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,64,128,1,fp8,fp8,0,0.0639626681804657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,fp8,0,0.11731732885042827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,64,0,1,fp8,fp8,0,0.10913599530855815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,float16,0,0.06741333504517873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,float16,0,0.06805866460005443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,float16,0,0.11743999520937602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,fp8,0,0.06772266825040181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,64,128,1,fp8,fp8,0,0.06431999802589417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,64,0,1,fp8,fp8,0,0.10969066619873047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,float16,0,0.1156213382879893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,fp8,0,0.06771733363469441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,64,128,1,fp8,fp8,0,0.06392533580462138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,fp8,0,0.11730133493741353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,64,0,1,fp8,fp8,0,0.10930132865905762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,fp8,0,0.11748799681663513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,float16,0,0.06770133475462596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,float16,0,0.11731200416882832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,fp8,0,0.06825066606203715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,64,128,1,fp8,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,64,0,1,fp8,fp8,0,0.10937600334485371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,float16,0,1.3820427258809407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,fp8,0,1.3859893480936687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,64,128,1,fp8,fp8,0,1.2306293646494548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,float16,0,2.0597227414449057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,float16,0,1.407551924387614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,64,0,1,fp8,fp8,0,1.8591519991556804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,fp8,0,2.0632373491923013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,64,128,1,fp8,fp8,0,1.2507306734720867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,fp8,0,1.411695957183838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,float16,0,2.0789546966552734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,float16,0,1.4293920199076335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,fp8,0,2.0796693166097007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,64,0,1,fp8,fp8,0,1.8813494046529133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,fp8,0,1.4250133832295735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,float16,0,2.104128042856852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,64,128,1,fp8,fp8,0,1.2615679899851482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,64,0,1,fp8,fp8,0,1.8905760447184246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,float16,0,1.458682696024577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,fp8,0,2.094069321950277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,fp8,0,1.4610826174418132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,64,128,1,fp8,fp8,0,1.3108373483022053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,float16,0,2.13154665629069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,float16,0,0.7890826861063639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,64,0,1,fp8,fp8,0,1.9368640581766765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,fp8,0,2.1397013664245605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,fp8,0,0.7983787059783936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,64,128,1,fp8,fp8,0,0.7365866502126058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,float16,0,1.1387893358866374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,float16,0,0.6907626787821451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,fp8,0,1.1498613357543945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,64,0,1,fp8,fp8,0,1.0572693347930908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,fp8,0,0.6965599854787191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,64,128,1,fp8,fp8,0,0.6227946678797404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,float16,0,1.0296533107757568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,float16,0,0.6995200316111246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,fp8,0,1.0379573504130046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,64,0,1,fp8,fp8,0,0.9419840176900228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,fp8,0,0.7041172981262207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,64,128,1,fp8,fp8,0,0.6314826806386312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,float16,0,1.0424959659576416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,float16,0,0.7043733596801758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,fp8,0,1.045797348022461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,64,0,1,fp8,fp8,0,0.9505386352539062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,fp8,0,0.7105813026428223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,64,128,1,fp8,fp8,0,0.6391199827194214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,float16,0,1.04749329884847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,fp8,0,1.0538400014241536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,float16,0,0.7226773103078207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,64,0,1,fp8,fp8,0,0.9595466454823812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,fp8,0,0.7297493616739908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,64,128,1,fp8,fp8,0,0.6598453521728516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,float16,0,1.0665173530578613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,float16,0,0.40563201904296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,fp8,0,0.412773331006368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,float16,0,0.5865066846211752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,64,128,1,fp8,fp8,0,0.3802880048751831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,fp8,0,1.0722133318583171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,64,0,1,fp8,fp8,0,0.9809866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,fp8,0,0.593946655591329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,float16,0,0.3556319872538249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,64,0,1,fp8,fp8,0,0.5456693172454834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,fp8,0,0.35887467861175537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,fp8,0,0.5326453447341919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,float16,0,0.5327466726303101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,64,128,1,fp8,fp8,0,0.32449066638946533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,64,0,1,fp8,fp8,0,0.4896373351414998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,float16,0,0.3591146469116211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,float16,0,0.5350240071614584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,fp8,0,0.3615093231201172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,64,128,1,fp8,fp8,0,0.32915733257929486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,fp8,0,0.5366506576538086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,64,0,1,fp8,fp8,0,0.4960213502248128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,float16,0,0.363322655359904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,float16,0,0.5406933228174845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,fp8,0,0.3673173189163208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,64,128,1,fp8,fp8,0,0.332586665948232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,fp8,0,0.5442239840825399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,64,0,1,fp8,fp8,0,0.4981600046157837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,float16,0,0.3733333349227905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,fp8,0,0.37773334980010986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,float16,0,0.5506453514099121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,64,128,1,fp8,fp8,0,0.34196265538533527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,float16,0,0.21462933222452799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,fp8,0,0.5539946556091309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,64,0,1,fp8,fp8,0,0.5074453353881836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,float16,0,0.3113759954770406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,fp8,0,0.2206559975941976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,64,128,1,fp8,fp8,0,0.2036799987157186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,fp8,0,0.3149120012919108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,64,0,1,fp8,fp8,0,0.29313600063323975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,float16,0,0.18685332934061685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,float16,0,0.27897600332895917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,fp8,0,0.18761066595713297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,64,128,1,fp8,fp8,0,0.17471466461817423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,fp8,0,0.2808106740315755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,64,0,1,fp8,fp8,0,0.26242132981618244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,float16,0,0.18725866079330444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,float16,0,0.2807679971059163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,fp8,0,0.19130667050679526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,64,128,1,fp8,fp8,0,0.1766080061594645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,float16,0,0.28330665826797485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,fp8,0,0.2812959949175517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,64,0,1,fp8,fp8,0,0.2643413345019023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,float16,0,0.19065600633621216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,fp8,0,0.19411200284957886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,64,128,1,fp8,fp8,0,0.18027732769648233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,fp8,0,0.2868373394012451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,64,0,1,fp8,fp8,0,0.26629867156346637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,float16,0,0.19706666469573975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,float16,0,0.29133333762486774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,float16,0,0.1200320025285085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,fp8,0,0.20105600357055664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,64,128,1,fp8,fp8,0,0.18553600708643594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,fp8,0,0.2940160036087036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,64,0,1,fp8,fp8,0,0.2732800046602885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,float16,0,0.17078399658203125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,fp8,0,0.12338133653004964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,64,128,1,fp8,fp8,0,0.11687466502189636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,fp8,0,0.17352000872294107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,64,0,1,fp8,fp8,0,0.16546133160591125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,float16,0,0.10504532853762309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,float16,0,0.1564853290716807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,fp8,0,0.10683199763298035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,fp8,0,0.10712533195813496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,64,128,1,fp8,fp8,0,0.09686400492986043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,fp8,0,0.15638933579126993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,64,0,1,fp8,fp8,0,0.14432533582051596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,float16,0,0.10563199718793233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,float16,0,0.15620799859364828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,64,128,1,fp8,fp8,0,0.09709866841634114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,fp8,0,0.15761066476504007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,64,0,1,fp8,fp8,0,0.14408000310262045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,float16,0,0.10698666175206502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,float16,0,0.1564959983030955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,fp8,0,0.10844799876213074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,64,128,1,fp8,fp8,0,0.09941866993904114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,fp8,0,0.15832533439000449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,64,0,1,fp8,fp8,0,0.14621333281199136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,float16,0,0.10839466253916423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,float16,0,0.15812800327936807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,fp8,0,0.1111840009689331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,64,128,1,fp8,fp8,0,0.10289067029953003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,64,128,1,fp8,fp8,0,0.06825600067774455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,fp8,0,0.16063466668128967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,64,0,1,fp8,fp8,0,0.15225066741307577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,float16,0,0.07008000214894612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,float16,0,0.10136533776919048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,fp8,0,0.07028799752394359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,fp8,0,0.1042133371035258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,64,0,1,fp8,fp8,0,0.09710400303204854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,float16,0,0.06592533489068349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,float16,0,0.09708799918492635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,fp8,0,0.06599999964237213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,64,128,1,fp8,fp8,0,0.061648001273473106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,fp8,0,0.09741866588592529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,64,0,1,fp8,fp8,0,0.09057066837946574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,float16,0,0.06442133088906606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,float16,0,0.06634133557478587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,float16,0,0.09850666920344035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,fp8,0,0.06624533236026764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,64,128,1,fp8,fp8,0,0.060175999999046326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,fp8,0,0.09841600060462952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,64,0,1,fp8,fp8,0,0.092357337474823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,float16,0,0.0990559955437978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,fp8,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,64,128,1,fp8,fp8,0,0.06198933223883311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,fp8,0,0.099263995885849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,64,0,1,fp8,fp8,0,0.09278933207194011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,float16,0,0.06585599978764851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,float16,0,0.09902933239936829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,fp8,0,0.0681333343187968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,64,128,1,fp8,fp8,0,0.06302399933338165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,fp8,0,0.09943466385205586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,64,0,1,fp8,fp8,0,0.09258133172988892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,float16,0,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,float16,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,64,128,1,fp8,fp8,0,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,64,128,1,fp8,fp8,0,0.05003199974695841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,fp8,0,0.07430399954319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,64,0,1,fp8,fp8,0,0.07020266850789388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,64,0,1,fp8,fp8,0,0.07006399830182393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,float16,0,0.05366933345794678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,float16,0,0.07445866862932841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,fp8,0,0.07495999832948048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,float16,0,0.054799998799959816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,64,0,1,fp8,fp8,0,0.0705386648575465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,float16,0,0.0742986649274826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,fp8,0,0.053946668903032936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,64,128,1,fp8,fp8,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,fp8,0,0.07583466668923695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,fp8,0,0.07583466668923695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,64,0,1,fp8,fp8,0,0.07054933408896129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,float16,0,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,float16,0,0.07634666562080383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,64,128,1,fp8,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,float16,0,0.05418133238951365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,float16,0,0.07427733143170674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,fp8,0,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,64,128,1,fp8,fp8,0,0.05120000243186951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,fp8,0,0.07481599847475688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,64,0,1,fp8,fp8,0,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,float16,0,1.0227413177490234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,fp8,0,1.0317013263702393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,64,128,1,fp8,fp8,0,0.9216586748758951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,float16,0,1.349455992380778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,fp8,0,1.3608266512552898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,float16,0,1.036186695098877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,64,0,1,fp8,fp8,0,1.2338133653004963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,fp8,0,1.0473066965738933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,64,128,1,fp8,fp8,0,0.9343252976735433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,float16,0,1.3709227244059246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,fp8,0,1.3779840469360352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,float16,0,1.0484639803568523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,64,0,1,fp8,fp8,0,1.2447840372721355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,float16,0,1.3820533752441406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,fp8,0,1.0565653642018635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,64,128,1,fp8,fp8,0,0.9444373448689779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,64,0,1,fp8,fp8,0,1.258016029993693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,float16,0,1.0698506832122803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,fp8,0,1.3875999450683594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,fp8,0,1.0806026458740234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,float16,0,1.4008960723876953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,64,128,1,fp8,fp8,0,0.9706772963205973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,float16,0,0.5933546622594198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,fp8,0,1.4158132870992024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,64,128,1,fp8,fp8,0,0.5574453274408976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,64,0,1,fp8,fp8,0,1.2808586756388347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,float16,0,0.7666986783345541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,fp8,0,0.6034613450368246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,fp8,0,0.7769760290781657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,fp8,0,0.5227733453114828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,64,0,1,fp8,fp8,0,0.7188426653544108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,float16,0,0.5186293522516886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,float16,0,0.6885546843210856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,64,128,1,fp8,fp8,0,0.4705599943796794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,float16,0,0.6933653354644775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,fp8,0,0.6926773389180502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,64,0,1,fp8,fp8,0,0.6298346519470215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,float16,0,0.5247626701990763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,fp8,0,0.5312426487604777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,64,128,1,fp8,fp8,0,0.4768960078557332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,fp8,0,0.7002986272176107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,fp8,0,0.5366826852162679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,64,0,1,fp8,fp8,0,0.6352533499399821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,float16,0,0.530842661857605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,float16,0,0.7006133397420248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,64,128,1,fp8,fp8,0,0.4828266700108846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,fp8,0,0.7068479855855306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,64,0,1,fp8,fp8,0,0.642629345258077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,float16,0,0.541973352432251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,float16,0,0.7132853666941324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,fp8,0,0.5490399996439616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,64,128,1,fp8,fp8,0,0.49569066365559894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,fp8,0,0.7206453482309977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,float16,0,0.3075466752052307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,64,0,1,fp8,fp8,0,0.6554293235143026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,float16,0,0.39926934242248535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,fp8,0,0.3137066761652629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,64,128,1,fp8,fp8,0,0.2892586588859558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,float16,0,0.3553066651026408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,fp8,0,0.4042346477508545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,64,0,1,fp8,fp8,0,0.37442131837209064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,float16,0,0.26818666855494183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,fp8,0,0.26983465751012164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,64,128,1,fp8,fp8,0,0.24779733022054037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,fp8,0,0.2726186712582906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,fp8,0,0.35837332407633465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,64,0,1,fp8,fp8,0,0.32980799674987793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,fp8,0,0.3598080078760783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,float16,0,0.2690666715304057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,float16,0,0.3566240072250366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,64,128,1,fp8,fp8,0,0.2505653301874797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,64,128,1,fp8,fp8,0,0.25334399938583374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,64,0,1,fp8,fp8,0,0.3332800070444743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,float16,0,0.2749066750208537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,float16,0,0.3639520009358724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,fp8,0,0.27692266305287677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,fp8,0,0.36555198828379315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,64,0,1,fp8,fp8,0,0.33825600147247314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,float16,0,0.28196267286936444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,64,0,1,fp8,fp8,0,0.34266666571299237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,float16,0,0.3697119951248169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,fp8,0,0.2850400010744731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,64,128,1,fp8,fp8,0,0.2605973283449809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,fp8,0,0.37589867909749347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,float16,0,0.1629759967327118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,float16,0,0.21333332856496176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,64,0,1,fp8,fp8,0,0.2026240030924479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,fp8,0,0.1682986617088318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,64,128,1,fp8,fp8,0,0.15820800264676413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,fp8,0,0.21704532702763876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,float16,0,0.13969066739082336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,float16,0,0.18781334161758423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,fp8,0,0.14085867007573447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,64,128,1,fp8,fp8,0,0.1288640002409617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,fp8,0,0.1895093321800232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,64,0,1,fp8,fp8,0,0.17521599928538004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,float16,0,0.13942933082580566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,float16,0,0.18922666708628336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,fp8,0,0.14377066493034363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,64,128,1,fp8,fp8,0,0.1322719951470693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,fp8,0,0.19022399187088013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,64,0,1,fp8,fp8,0,0.17829867204030356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,64,0,1,fp8,fp8,0,0.18201599518458048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,float16,0,0.14173332850138345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,float16,0,0.19202667474746704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,fp8,0,0.1495786706606547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,fp8,0,0.14390400052070618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,64,128,1,fp8,fp8,0,0.13597333431243896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,fp8,0,0.1944213310877482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,float16,0,0.14800533652305603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,float16,0,0.19765865802764893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,64,128,1,fp8,fp8,0,0.141866664091746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,fp8,0,0.19930134216944376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,64,0,1,fp8,fp8,0,0.18712000052134195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,float16,0,0.08813333511352539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,float16,0,0.11852799852689107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,fp8,0,0.09092799822489421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,64,128,1,fp8,fp8,0,0.08931733171145122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,fp8,0,0.12165333827336629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,fp8,0,0.11203199625015259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,64,0,1,fp8,fp8,0,0.11555199821790059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,float16,0,0.08043733239173889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,float16,0,0.10955199599266052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,fp8,0,0.08037866652011871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,64,128,1,fp8,fp8,0,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,64,0,1,fp8,fp8,0,0.10121599833170573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,float16,0,0.07901866734027863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,float16,0,0.10929066936175029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,fp8,0,0.0804746647675832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,64,128,1,fp8,fp8,0,0.0743146687746048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,fp8,0,0.11074666182200114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,64,0,1,fp8,fp8,0,0.1018933355808258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,float16,0,0.08042133351167043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,float16,0,0.11086400349934895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,fp8,0,0.08242133259773254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,64,128,1,fp8,fp8,0,0.07427733143170674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,fp8,0,0.11183999975522359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,64,0,1,fp8,fp8,0,0.10288533568382263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,float16,0,0.08241066833337148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,float16,0,0.11128532886505127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,fp8,0,0.08349333206812541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,64,128,1,fp8,fp8,0,0.07655466596285503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,fp8,0,0.11450666189193726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,64,0,1,fp8,fp8,0,0.10548800230026245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,float16,0,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,float16,0,0.04991999765237173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,float16,0,0.07240533332029979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,float16,0,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,fp8,0,0.05604266623655955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,64,128,1,fp8,fp8,0,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,64,0,1,fp8,fp8,0,0.0662773350874583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,fp8,0,0.07630399862925212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,64,0,1,fp8,fp8,0,0.07212266822655995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,fp8,0,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,64,128,1,fp8,fp8,0,0.0476746658484141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,fp8,0,0.07217066486676534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,64,0,1,fp8,fp8,0,0.06665599842866261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,float16,0,0.051669334371884666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,float16,0,0.07123733560244243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,fp8,0,0.051125332713127136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,64,128,1,fp8,fp8,0,0.04771733283996582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,fp8,0,0.0726986676454544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,float16,0,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,float16,0,0.07096000015735626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,float16,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,fp8,0,0.05166399975617727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,64,128,1,fp8,fp8,0,0.04819199939568838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,fp8,0,0.07241066793600719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,64,0,1,fp8,fp8,0,0.06805866460005443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,float16,0,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,fp8,0,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,64,128,1,fp8,fp8,0,0.04833066463470459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,fp8,0,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,64,0,1,fp8,fp8,0,0.06781333188215892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,float16,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,float16,0,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,fp8,0,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,float16,0,0.051925331354141235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,64,128,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,fp8,0,0.052933335304260254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,64,0,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,float16,0,0.0516480008761088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,64,128,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,fp8,0,0.0517546683549881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,64,0,1,fp8,fp8,0,0.04980266590913137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,float16,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,64,0,1,fp8,fp8,0,0.048858667413393654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,float16,0,0.05190933247407278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,64,128,1,fp8,fp8,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,fp8,0,0.05259199937184652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,float16,0,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,64,0,1,fp8,fp8,0,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,float16,0,0.05237866441408793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,float16,0,0.05211733281612396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,fp8,0,0.03984533250331879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,64,128,1,fp8,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,fp8,0,0.05247466762860616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,float16,0,0.04092800120512644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,64,128,1,fp8,fp8,0,0.039279999832312264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,fp8,0,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,64,0,1,fp8,fp8,0,0.04969066878159841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,float16,0,1.2210400104522705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,fp8,0,1.2211093107859294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,float16,0,1.431162675221761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,64,128,1,fp8,fp8,0,1.1148853302001953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,fp8,0,1.429258664449056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,64,0,1,fp8,fp8,0,1.318069299062093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,float16,0,1.2298293113708496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,float16,0,1.4419093132019043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,fp8,0,1.2283146381378174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,64,128,1,fp8,fp8,0,1.167413314183553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,64,0,1,fp8,fp8,0,1.3500213623046875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,fp8,0,1.4343892733256023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,float16,0,1.2370826403299968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,fp8,0,1.233855962753296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,float16,0,1.4481706619262695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,64,128,1,fp8,fp8,0,1.188704013824463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,fp8,0,1.4475785891215007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,64,0,1,fp8,fp8,0,1.39411195119222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,float16,0,1.2572853565216064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,fp8,0,1.253168026606242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,float16,0,1.4844053586324055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,64,128,1,fp8,fp8,0,1.2253866990407307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,float16,0,0.6727253595987955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,fp8,0,1.469055970509847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,float16,0,0.7854080200195312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,fp8,0,0.6588906844456991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,64,0,1,fp8,fp8,0,1.430832068125407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,64,128,1,fp8,fp8,0,0.642735997835795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,fp8,0,0.7731466293334961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,float16,0,0.6198613246281942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,64,0,1,fp8,fp8,0,0.7496426900227865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,float16,0,0.7273706595102946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,fp8,0,0.6195626656214396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,64,128,1,fp8,fp8,0,0.5657973289489746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,fp8,0,0.72652800877889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,64,0,1,fp8,fp8,0,0.6708693504333496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,float16,0,0.6227519909540812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,float16,0,0.7324693202972412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,64,128,1,fp8,fp8,0,0.5789973338445028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,fp8,0,0.7303360303243002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,fp8,0,0.6216853459676107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,64,0,1,fp8,fp8,0,0.6804107030232748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,fp8,0,0.6260266701380411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,float16,0,0.6303199927012125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,float16,0,0.7366186777750651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,64,128,1,fp8,fp8,0,0.5861706733703613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,fp8,0,0.7344533602396647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,64,0,1,fp8,fp8,0,0.6884746551513672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,float16,0,0.6353280146916708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,fp8,0,0.6332266728083292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,float16,0,0.7457813421885172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,64,128,1,fp8,fp8,0,0.6033813158671061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,float16,0,0.4045226573944092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,float16,0,0.34518933296203613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,fp8,0,0.7425706386566162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,64,0,1,fp8,fp8,0,0.7088479995727539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,fp8,0,0.3399733304977417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,64,128,1,fp8,fp8,0,0.3322719931602478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,float16,0,0.3752266565958659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,fp8,0,0.39773333072662354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,64,0,1,fp8,fp8,0,0.3869066635767619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,float16,0,0.31918400526046753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,fp8,0,0.3185653289159139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,64,128,1,fp8,fp8,0,0.2937120000521342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,fp8,0,0.3742719888687134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,64,0,1,fp8,fp8,0,0.3452693223953247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,float16,0,0.32064000765482586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,float16,0,0.3749920129776001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,fp8,0,0.31833599011103314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,64,128,1,fp8,fp8,0,0.29742934306462604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,fp8,0,0.37481598059336346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,64,0,1,fp8,fp8,0,0.35252801577250165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,float16,0,0.32264000177383423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,float16,0,0.3787999947865804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,fp8,0,0.32371199131011963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,64,128,1,fp8,fp8,0,0.302239994208018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,fp8,0,0.37827201684316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,64,0,1,fp8,fp8,0,0.3550399939219157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,float16,0,0.32686400413513184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,float16,0,0.3848213354746501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,fp8,0,0.32571732997894287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,64,128,1,fp8,fp8,0,0.30938132603963214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,fp8,0,0.3832906484603882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,float16,0,0.1827146609624227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,64,0,1,fp8,fp8,0,0.36237867673238117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,fp8,0,0.20958934227625528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,float16,0,0.2134986718495687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,fp8,0,0.17881600062052408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,float16,0,0.19630932807922363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,64,128,1,fp8,fp8,0,0.17467200756072998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,64,0,1,fp8,fp8,0,0.2050079902013143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,float16,0,0.1688213348388672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,fp8,0,0.1666933298110962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,64,128,1,fp8,fp8,0,0.1554026703039805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,float16,0,0.19844800233840942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,fp8,0,0.1965120037396749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,64,0,1,fp8,fp8,0,0.18410666783650717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,float16,0,0.16850133736928305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,fp8,0,0.1673226753870646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,64,128,1,fp8,fp8,0,0.15621333320935568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,fp8,0,0.1974453330039978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,64,0,1,fp8,fp8,0,0.18560532728830972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,float16,0,0.17052799463272095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,float16,0,0.19896000623703003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,fp8,0,0.17075733343760172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,64,128,1,fp8,fp8,0,0.1605280041694641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,fp8,0,0.1993280053138733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,64,0,1,fp8,fp8,0,0.18889067570368448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,float16,0,0.17246933778127035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,64,0,1,fp8,fp8,0,0.19337066014607748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,float16,0,0.20174932479858398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,fp8,0,0.17330666383107504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,64,128,1,fp8,fp8,0,0.10121599833170573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,64,128,1,fp8,fp8,0,0.16450666387875876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,fp8,0,0.20111999909083048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,float16,0,0.10199466347694397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,float16,0,0.11953600247701009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,fp8,0,0.1002293328444163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,fp8,0,0.11620266238848369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,64,0,1,fp8,fp8,0,0.11658666531244914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,float16,0,0.09289600451787312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,float16,0,0.1090826690196991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,fp8,0,0.0936853289604187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,64,128,1,fp8,fp8,0,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,64,128,1,fp8,fp8,0,0.08538132905960083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,fp8,0,0.10970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,64,0,1,fp8,fp8,0,0.1018506685892741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,64,0,1,fp8,fp8,0,0.101200004418691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,float16,0,0.09271466732025146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,float16,0,0.1095199982325236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,fp8,0,0.09274133046468098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,fp8,0,0.11107732852300008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,fp8,0,0.10843732953071594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,float16,0,0.09431999921798706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,float16,0,0.11032000184059143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,fp8,0,0.09425600369771321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,64,128,1,fp8,fp8,0,0.08656533559163411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,fp8,0,0.1106719970703125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,64,0,1,fp8,fp8,0,0.10274666547775269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,float16,0,0.09435199697812398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,float16,0,0.11133866508801778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,fp8,0,0.09335466225941975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,64,128,1,fp8,fp8,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,64,128,1,fp8,fp8,0,0.09034666419029236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,64,0,1,fp8,fp8,0,0.10552000006039937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,float16,0,0.05607999861240387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,float16,0,0.0680159976085027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,fp8,0,0.057477335135142006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,64,128,1,fp8,fp8,0,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,fp8,0,0.06821333368619283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,64,0,1,fp8,fp8,0,0.06204266846179962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,64,0,1,fp8,fp8,0,0.0652106652657191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,float16,0,0.06634666522343953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,float16,0,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,float16,0,0.06633600095907848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,fp8,0,0.05583466589450836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,fp8,0,0.06607466439406078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,float16,0,0.05555733541647593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,float16,0,0.05570666491985321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,fp8,0,0.05594133337338766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,64,128,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,fp8,0,0.06548800071080525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,64,0,1,fp8,fp8,0,0.0631039987007777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,fp8,0,0.06529599924882253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,float16,0,0.065610667069753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,64,0,1,fp8,fp8,0,0.06215466558933258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,float16,0,0.06604266663392384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,fp8,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,64,128,1,fp8,fp8,0,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,float16,0,0.05498133103052775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,float16,0,0.04715733230113983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,fp8,0,0.05594133337338766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,64,128,1,fp8,fp8,0,0.05171733101209005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,fp8,0,0.06629866858323415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,64,0,1,fp8,fp8,0,0.061941335598627724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,float16,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,fp8,0,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,64,128,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,64,128,1,fp8,fp8,0,0.036874666810035706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,64,0,1,fp8,fp8,0,0.04458666841189066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,float16,0,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,float16,0,0.04461866617202759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,fp8,0,0.03845866769552231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,fp8,0,0.04529066880544027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,64,0,1,fp8,fp8,0,0.04249600072701772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,float16,0,0.03885333240032196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,float16,0,0.045269335309664406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,fp8,0,0.03940266619126002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,64,128,1,fp8,fp8,0,0.03700266778469086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,fp8,0,0.045834665497144066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,64,0,1,fp8,fp8,0,0.042394667863845825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,float16,0,0.0383840004603068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,float16,0,0.04567466676235199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,64,128,1,fp8,fp8,0,0.03649600098530451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,fp8,0,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,64,0,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,float16,0,0.03984533250331879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,64,0,1,fp8,fp8,0,0.04466133316357931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,float16,0,0.045050665736198425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,fp8,0,0.038848000268141426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,64,128,1,fp8,fp8,0,0.03699733316898346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,fp8,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,float16,0,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,64,0,1,fp8,fp8,0,0.03233066697915395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,float16,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,64,128,1,fp8,fp8,0,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,fp8,0,0.03295466552178065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,float16,0,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,64,0,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,64,0,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,float16,0,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,float16,0,0.03254399945338567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,64,128,1,fp8,fp8,0,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,fp8,0,0.03317866722742716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,float16,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,64,128,1,fp8,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,64,0,1,fp8,fp8,0,0.031082667410373688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,float16,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,float16,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,64,128,1,fp8,fp8,0,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,fp8,0,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,64,0,1,fp8,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,float16,0,1.1813546816507976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,float16,0,1.1927519639333088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,fp8,0,1.1841866970062256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,64,128,1,fp8,fp8,0,1.0841279824574788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,fp8,0,1.196837345759074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,64,0,1,fp8,fp8,0,1.1050773461659749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,float16,0,1.1930987040201824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,float16,0,1.2067093054453533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,fp8,0,1.1922240257263184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,64,128,1,fp8,fp8,0,1.145306666692098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,fp8,0,1.2033600012461345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,64,0,1,fp8,fp8,0,1.1527307033538818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,float16,0,1.2032533486684163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,float16,0,1.2126293182373047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,fp8,0,1.195797363917033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,64,128,1,fp8,fp8,0,1.1739786465962727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,fp8,0,1.210752010345459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,64,0,1,fp8,fp8,0,1.1885653336842854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,float16,0,1.229909340540568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,float16,0,1.2477280298868816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,fp8,0,1.2235199610392253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,64,128,1,fp8,fp8,0,1.1921546459197998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,float16,0,0.6539733409881592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,fp8,0,1.2275359630584717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,float16,0,0.6619093418121338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,64,0,1,fp8,fp8,0,1.2119146982828777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,fp8,0,0.6394720077514648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,64,128,1,fp8,fp8,0,0.6285813252131144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,fp8,0,0.6523946523666382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,64,0,1,fp8,fp8,0,0.636954665184021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,float16,0,0.601797342300415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,float16,0,0.6076906522115072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,fp8,0,0.5992213487625122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,64,128,1,fp8,fp8,0,0.5502293507258097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,fp8,0,0.6058133443196615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,64,0,1,fp8,fp8,0,0.5588586727778116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,float16,0,0.6049973169962565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,float16,0,0.6117546558380127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,fp8,0,0.6041280031204224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,64,128,1,fp8,fp8,0,0.5646239916483561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,fp8,0,0.6077280044555664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,64,0,1,fp8,fp8,0,0.572927991549174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,float16,0,0.6082773208618164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,float16,0,0.6149813334147135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,fp8,0,0.607802669207255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,64,128,1,fp8,fp8,0,0.5701173146565756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,fp8,0,0.6140693426132202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,float16,0,0.6185280084609985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,64,0,1,fp8,fp8,0,0.5763306617736816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,float16,0,0.6243679920832316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,fp8,0,0.6155733267466227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,64,128,1,fp8,fp8,0,0.5906986792882284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,float16,0,0.3364693323771159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,fp8,0,0.6221173206965128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,64,0,1,fp8,fp8,0,0.6021653413772583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,float16,0,0.3407946825027466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,fp8,0,0.3303626577059428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,64,128,1,fp8,fp8,0,0.32438933849334717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,fp8,0,0.335477352142334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,64,0,1,fp8,fp8,0,0.3287893335024516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,float16,0,0.31065599123636883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,float16,0,0.31357866525650024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,fp8,0,0.309717337290446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,64,128,1,fp8,fp8,0,0.2857866684595744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,fp8,0,0.312394658724467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,64,0,1,fp8,fp8,0,0.28969067335128784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,float16,0,0.3111306627591451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,float16,0,0.3136319915453593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,fp8,0,0.31062932809193927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,64,128,1,fp8,fp8,0,0.2888266642888387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,fp8,0,0.3123199939727783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,64,0,1,fp8,fp8,0,0.29338665803273517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,float16,0,0.3142613371213277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,float16,0,0.3176959951718648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,fp8,0,0.31249600648880005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,64,128,1,fp8,fp8,0,0.29314666986465454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,fp8,0,0.3166240056355794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,64,0,1,fp8,fp8,0,0.29766400655110675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,float16,0,0.31726400057474774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,float16,0,0.3222506642341614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,fp8,0,0.3176906704902649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,64,128,1,fp8,fp8,0,0.3019040028254191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,fp8,0,0.32043200731277466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,64,0,1,fp8,fp8,0,0.3057546615600586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,float16,0,0.17615467309951782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,float16,0,0.1800160010655721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,fp8,0,0.17462400595347086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,64,128,1,fp8,fp8,0,0.17254400253295898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,fp8,0,0.17805866400400797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,64,0,1,fp8,fp8,0,0.1748639941215515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,float16,0,0.16167466839154562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,float16,0,0.16395200292269388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,fp8,0,0.16200000047683716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,64,128,1,fp8,fp8,0,0.15104533235232034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,fp8,0,0.16446933150291443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,64,0,1,fp8,fp8,0,0.15203199783960977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,float16,0,0.1627840002377828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,float16,0,0.1646933356920878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,float16,0,0.16633599996566772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,fp8,0,0.16332800189654031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,64,128,1,fp8,fp8,0,0.15338666240374246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,fp8,0,0.16455466548601785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,64,0,1,fp8,fp8,0,0.15611732999483743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,float16,0,0.16514133413632712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,fp8,0,0.16568000117937723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,64,128,1,fp8,fp8,0,0.15399466951688132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,fp8,0,0.1655733287334442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,64,0,1,fp8,fp8,0,0.1585973302523295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,float16,0,0.16804265975952148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,float16,0,0.16873067617416382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,fp8,0,0.1672160029411316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,64,128,1,fp8,fp8,0,0.16101866960525513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,fp8,0,0.16936000188191733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,64,0,1,fp8,fp8,0,0.16194132963816324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,float16,0,0.09853866696357727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,float16,0,0.10008000334103902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,fp8,0,0.09682666261990865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,64,128,1,fp8,fp8,0,0.09873066345850627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,64,128,1,fp8,fp8,0,0.08239466448624928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,fp8,0,0.09891200065612793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,64,0,1,fp8,fp8,0,0.08475200335184734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,64,0,1,fp8,fp8,0,0.09920533498128255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,float16,0,0.09158399701118469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,fp8,0,0.0904853343963623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,float16,0,0.09125866492589314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,fp8,0,0.09220799803733826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,fp8,0,0.090938667456309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,float16,0,0.09084266424179077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,float16,0,0.09113599856694539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,64,128,1,fp8,fp8,0,0.08343999584515889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,64,128,1,fp8,fp8,0,0.08483200271924336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,fp8,0,0.09178133805592854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,64,0,1,fp8,fp8,0,0.08550399541854858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,float16,0,0.09223999579747517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,float16,0,0.09326400359471639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,float16,0,0.0929813285668691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,fp8,0,0.09060266613960266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,fp8,0,0.09197333455085754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,64,0,1,fp8,fp8,0,0.08634133140246074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,float16,0,0.09326400359471639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,fp8,0,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,64,128,1,fp8,fp8,0,0.08749333024024963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,fp8,0,0.09269332885742188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,64,0,1,fp8,fp8,0,0.08913600444793701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,fp8,0,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,float16,0,0.056143999099731445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,float16,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,float16,0,0.056090667843818665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,fp8,0,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,64,128,1,fp8,fp8,0,0.05398933092753092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,fp8,0,0.055573334296544395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,64,0,1,fp8,fp8,0,0.05162666738033295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,64,0,1,fp8,fp8,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,float16,0,0.05576533575852712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,fp8,0,0.054671997825304665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,64,128,1,fp8,fp8,0,0.04967466493447622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,float16,0,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,float16,0,0.054842665791511536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,fp8,0,0.054325332244237266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,float16,0,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,64,128,1,fp8,fp8,0,0.05136533578236898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,64,0,1,fp8,fp8,0,0.0517546683549881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,float16,0,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,fp8,0,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,64,128,1,fp8,fp8,0,0.05087466537952423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,fp8,0,0.05449066559473673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,64,0,1,fp8,fp8,0,0.05180266499519348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,float16,0,0.053871999184290566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,64,0,1,fp8,fp8,0,0.05171733101209005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,float16,0,0.055957332253456116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,fp8,0,0.054197331269582115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,64,128,1,fp8,fp8,0,0.051818668842315674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,float16,0,0.038586666186650596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,float16,0,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,64,128,1,fp8,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,64,0,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,float16,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,float16,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,64,128,1,fp8,fp8,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,64,0,1,fp8,fp8,0,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,float16,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,float16,0,0.038160001238187156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,64,128,1,fp8,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,fp8,0,0.0384853333234787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,64,0,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,float16,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,float16,0,0.038533332447210945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,float16,0,0.038719999293486275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,fp8,0,0.03741333385308584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,64,128,1,fp8,fp8,0,0.0360000009338061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,64,128,1,fp8,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,64,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,fp8,0,0.0390133336186409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,64,0,1,fp8,fp8,0,0.035946667194366455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,float16,0,0.03842133283615112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,fp8,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,float16,0,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,fp8,0,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,64,128,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,64,0,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,64,128,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,64,0,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,float16,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,64,128,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,float16,0,0.02586666742960612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,float16,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,64,128,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,64,0,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,float16,0,0.026133333643277485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,float16,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,64,128,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,fp8,0,0.02601066728432973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,64,0,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,64,128,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,64,0,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,64,128,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,64,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,float16,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,float16,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,64,128,1,fp8,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,64,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,64,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,64,128,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,64,0,1,fp8,fp8,0,0.021888000269730885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,float16,0,0.548906683921814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,float16,0,0.5555040041605631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,fp8,0,0.5534293254216512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,64,128,1,fp8,fp8,0,0.5046773354212443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,fp8,0,0.5450719992319742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,64,0,1,fp8,fp8,0,0.49375466505686444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,float16,0,0.5527413288752238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,float16,0,0.5613066752751669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,fp8,0,0.5592480103174845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,64,128,1,fp8,fp8,0,0.523253321647644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,fp8,0,0.5502666632334391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,64,0,1,fp8,fp8,0,0.5137333472569784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,float16,0,0.5645173390706381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,float16,0,0.5554133256276449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,fp8,0,0.5642133156458536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,64,128,1,fp8,fp8,0,0.5247626701990763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,fp8,0,0.5520533323287964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,64,0,1,fp8,fp8,0,0.5172906716664633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,float16,0,0.5747466484705607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,float16,0,0.5649813413619995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,fp8,0,0.570575992266337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,float16,0,0.31360000371932983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,64,128,1,fp8,fp8,0,0.553056001663208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,fp8,0,0.5592533349990845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,fp8,0,0.306768000125885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,float16,0,0.3076000014940898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,64,0,1,fp8,fp8,0,0.5393706560134888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,64,128,1,fp8,fp8,0,0.3035573363304138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,fp8,0,0.3011626601219177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,64,0,1,fp8,fp8,0,0.29950400193532306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,float16,0,0.286298672358195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,64,0,1,fp8,fp8,0,0.25512532393137616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,float16,0,0.2811253269513448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,fp8,0,0.28487465778986615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,64,128,1,fp8,fp8,0,0.25960532824198407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,fp8,0,0.2789386709531148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,float16,0,0.2871039907137553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,float16,0,0.2818560004234314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,fp8,0,0.28601600726445514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,64,128,1,fp8,fp8,0,0.26772799094518024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,fp8,0,0.28142400582631427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,64,0,1,fp8,fp8,0,0.2626826763153076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,float16,0,0.28758400678634644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,float16,0,0.2844533324241638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,fp8,0,0.2885439991950989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,64,128,1,fp8,fp8,0,0.27186665932337445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,fp8,0,0.2829493284225464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,64,0,1,fp8,fp8,0,0.2657279968261719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,float16,0,0.2953760027885437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,float16,0,0.2895359992980957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,fp8,0,0.2934453288714091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,64,128,1,fp8,fp8,0,0.2798186739285787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,fp8,0,0.28787734111150104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,64,0,1,fp8,fp8,0,0.2746666669845581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,float16,0,0.1651520033677419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,float16,0,0.16289066274960837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,fp8,0,0.1638826628526052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,float16,0,0.14910399913787842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,64,128,1,fp8,fp8,0,0.16260799765586853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,fp8,0,0.16074666380882263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,64,0,1,fp8,fp8,0,0.15987199544906616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,float16,0,0.1520799994468689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,float16,0,0.15278933445612589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,fp8,0,0.15217600266138712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,64,128,1,fp8,fp8,0,0.1404159963130951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,fp8,0,0.14798933267593384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,64,0,1,fp8,fp8,0,0.1371946632862091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,float16,0,0.14826132853825888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,fp8,0,0.15263467033704123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,64,128,1,fp8,fp8,0,0.14257599910100302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,fp8,0,0.14867732922236124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,64,0,1,fp8,fp8,0,0.1400373379389445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,float16,0,0.15413332978884378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,float16,0,0.15065600474675497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,fp8,0,0.153029332558314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,64,128,1,fp8,fp8,0,0.14353066682815552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,fp8,0,0.15027733643849692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,fp8,0,0.15382933616638184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,64,0,1,fp8,fp8,0,0.14114666978518167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,float16,0,0.15616533160209656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,float16,0,0.15268799662590027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,fp8,0,0.15521066387494406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,64,128,1,fp8,fp8,0,0.1492639978726705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,64,0,1,fp8,fp8,0,0.14662399888038635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,float16,0,0.09450133641560872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,64,0,1,fp8,fp8,0,0.09256533781687419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,float16,0,0.09100799759229024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,fp8,0,0.09303999940554301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,64,128,1,fp8,fp8,0,0.09332266449928284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,fp8,0,0.09034666419029236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,float16,0,0.0867199997107188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,float16,0,0.0844693382581075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,fp8,0,0.08709333340326945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,64,128,1,fp8,fp8,0,0.0784800002972285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,fp8,0,0.0848533312479655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,64,0,1,fp8,fp8,0,0.07644799848397572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,float16,0,0.0867199997107188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,float16,0,0.08505066235860188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,float16,0,0.08689066767692566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,float16,0,0.08547199765841167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,fp8,0,0.08640533685684204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,64,128,1,fp8,fp8,0,0.0792906681696574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,fp8,0,0.08336533109347026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,64,0,1,fp8,fp8,0,0.07854933540026347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,fp8,0,0.0862666666507721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,64,128,1,fp8,fp8,0,0.08039466540018718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,fp8,0,0.08474133412043254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,64,0,1,fp8,fp8,0,0.0788213312625885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,fp8,0,0.0846720039844513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,float16,0,0.08826667070388794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,float16,0,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,float16,0,0.08548266688982646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,fp8,0,0.08717333277066548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,64,128,1,fp8,fp8,0,0.08172266681989034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,64,0,1,fp8,fp8,0,0.08100266754627228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,float16,0,0.051685333251953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,fp8,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,64,128,1,fp8,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,fp8,0,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,64,0,1,fp8,fp8,0,0.04990399877230326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,float16,0,0.04986133178075155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,float16,0,0.051141331593195595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,float16,0,0.04833066463470459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,float16,0,0.048997332652409874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,fp8,0,0.05101333558559418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,64,128,1,fp8,fp8,0,0.04596266647179922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,64,0,1,fp8,fp8,0,0.04567466676235199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,fp8,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,64,128,1,fp8,fp8,0,0.04698666433493296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,fp8,0,0.049098665515581764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,64,0,1,fp8,fp8,0,0.045514668027559914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,float16,0,0.051258668303489685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,float16,0,0.05141866703828176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,float16,0,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,fp8,0,0.0510506679614385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,64,128,1,fp8,fp8,0,0.046384001771608986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,64,0,1,fp8,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,float16,0,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,fp8,0,0.05179200073083242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,64,128,1,fp8,fp8,0,0.048058668772379555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,fp8,0,0.04958933095137278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,64,0,1,fp8,fp8,0,0.04752000172932943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,float16,0,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,fp8,0,0.036789332826932274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,64,128,1,fp8,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,fp8,0,0.03716800113519033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,64,0,1,fp8,fp8,0,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,64,0,1,fp8,fp8,0,0.03180266668399175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,float16,0,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,float16,0,0.03393599887688955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,fp8,0,0.03600533306598663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,64,128,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,fp8,0,0.03479466587305069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,float16,0,0.03607466568549474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,float16,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,float16,0,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,64,128,1,fp8,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,fp8,0,0.035162667433420815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,64,0,1,fp8,fp8,0,0.0329120010137558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,float16,0,0.03629866739114126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,fp8,0,0.03699733316898346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,64,128,1,fp8,fp8,0,0.03404266635576884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,64,0,1,fp8,fp8,0,0.032458665470282234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,float16,0,0.03572266548871994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,float16,0,0.03460799902677536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,fp8,0,0.035760000348091125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,64,128,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,fp8,0,0.03590933233499527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,64,0,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,fp8,0,0.02480533222357432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,64,128,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,64,0,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,float16,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,64,128,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,fp8,0,0.02481599897146225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,64,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,float16,0,0.02388266722361247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,64,128,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,64,0,1,fp8,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,64,128,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,64,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,float16,0,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,64,128,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,64,0,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,64,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,float16,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,64,128,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,float16,0,0.02035733312368393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,fp8,0,0.020389333367347717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,fp8,0,0.020682666450738907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,64,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,64,128,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,64,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,float16,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,64,0,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,fp8,0,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,64,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,float16,0,0.020682666450738907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,64,128,1,fp8,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,float16,0,0.02000533292690913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,float16,0,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,float16,0,0.3008906642595927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,64,0,1,fp8,fp8,0,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,float16,0,0.3022879958152771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,fp8,0,0.3014026681582133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,fp8,0,0.29950400193532306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,64,128,1,fp8,fp8,0,0.2796106735865275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,64,0,1,fp8,fp8,0,0.2807360092798869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,float16,0,0.3036266764005025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,float16,0,0.30145599444707233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,fp8,0,0.30185067653656006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,64,128,1,fp8,fp8,0,0.29047467311223346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,fp8,0,0.3004693388938904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,64,0,1,fp8,fp8,0,0.28806400299072266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,64,128,1,fp8,fp8,0,0.29337600866953534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,float16,0,0.30536532402038574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,float16,0,0.305951992670695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,fp8,0,0.3046506643295288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,fp8,0,0.3051360050837199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,64,0,1,fp8,fp8,0,0.29306666056315106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,float16,0,0.3084266583124797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,float16,0,0.3084106643994649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,fp8,0,0.3062826593716939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,64,128,1,fp8,fp8,0,0.3027946750322978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,float16,0,0.1701386570930481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,fp8,0,0.3056480089823405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,float16,0,0.170799990495046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,64,0,1,fp8,fp8,0,0.30294400453567505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,fp8,0,0.16873067617416382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,float16,0,0.1567359964052836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,64,128,1,fp8,fp8,0,0.1686613361040751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,fp8,0,0.16875199476877847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,64,128,1,fp8,fp8,0,0.14772799611091614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,64,0,1,fp8,fp8,0,0.16978132724761963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,float16,0,0.1581013302008311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,float16,0,0.1572533349196116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,fp8,0,0.15684266885121664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,fp8,0,0.15718400478363037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,64,0,1,fp8,fp8,0,0.14800533652305603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,float16,0,0.15819199879964194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,fp8,0,0.15837867061297098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,float16,0,0.15901866555213928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,fp8,0,0.15913599729537964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,64,128,1,fp8,fp8,0,0.1502133309841156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,fp8,0,0.15847466389338175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,64,0,1,fp8,fp8,0,0.15425599614779154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,fp8,0,0.15733866890271506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,float16,0,0.16193067034085593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,64,0,1,fp8,fp8,0,0.15030933419863382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,float16,0,0.15920533736546835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,64,128,1,fp8,fp8,0,0.15409599741299948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,float16,0,0.16220800081888834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,fp8,0,0.16090666254361471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,64,128,1,fp8,fp8,0,0.1588586668173472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,fp8,0,0.15998400251070657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,64,0,1,fp8,fp8,0,0.15846400459607443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,float16,0,0.09470933675765991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,float16,0,0.09477866689364116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,float16,0,0.08715732892354329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,fp8,0,0.09333333373069763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,64,128,1,fp8,fp8,0,0.09637332955996196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,fp8,0,0.09477866689364116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,64,0,1,fp8,fp8,0,0.09552533427874248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,float16,0,0.0881813367207845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,float16,0,0.08828266461690266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,fp8,0,0.08654933174451192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,64,128,1,fp8,fp8,0,0.08170133332411449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,fp8,0,0.08701866865158081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,64,0,1,fp8,fp8,0,0.0814933329820633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,float16,0,0.08707200487454732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,fp8,0,0.08763200044631958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,64,128,1,fp8,fp8,0,0.08249600231647491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,fp8,0,0.08780800302823384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,fp8,0,0.08919999996821086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,64,0,1,fp8,fp8,0,0.08258666594823201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,float16,0,0.0885759989420573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,float16,0,0.08896000186602275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,fp8,0,0.08683733145395915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,64,128,1,fp8,fp8,0,0.08245866497357686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,64,0,1,fp8,fp8,0,0.08267199993133545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,float16,0,0.08897067109743755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,float16,0,0.0885653297106425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,fp8,0,0.08877333005269368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,64,128,1,fp8,fp8,0,0.08481599887212117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,fp8,0,0.08898666501045227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,64,0,1,fp8,fp8,0,0.08519466718037923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,float16,0,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,float16,0,0.05406400064627329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,float16,0,0.05166399975617727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,fp8,0,0.05261866748332977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,64,128,1,fp8,fp8,0,0.053898667295773826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,fp8,0,0.05372266471385956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,64,0,1,fp8,fp8,0,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,float16,0,0.05202666421731313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,fp8,0,0.052416001756985985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,64,128,1,fp8,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,64,128,1,fp8,fp8,0,0.0496319979429245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,fp8,0,0.05204799771308899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,64,0,1,fp8,fp8,0,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,float16,0,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,float16,0,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,fp8,0,0.05157333115736643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,fp8,0,0.051589335004488625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,64,0,1,fp8,fp8,0,0.048058668772379555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,float16,0,0.05179200073083242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,float16,0,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,float16,0,0.052704001466433205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,64,128,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,fp8,0,0.05248000224431356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,64,0,1,fp8,fp8,0,0.04970666766166687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,float16,0,0.0517546683549881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,fp8,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,64,128,1,fp8,fp8,0,0.04952000081539154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,64,128,1,fp8,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,fp8,0,0.05239999790986379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,64,0,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,float16,0,0.03419733295838038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,float16,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,64,128,1,fp8,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,fp8,0,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,64,0,1,fp8,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,float16,0,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,float16,0,0.03393599887688955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,fp8,0,0.03397866586844126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,64,0,1,fp8,fp8,0,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,float16,0,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,float16,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,fp8,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,64,128,1,fp8,fp8,0,0.03180799881617228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,fp8,0,0.03417066733042399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,64,0,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,float16,0,0.034671999514102936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,float16,0,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,fp8,0,0.03426666557788849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,64,128,1,fp8,fp8,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,fp8,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,64,128,1,fp8,fp8,0,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,64,0,1,fp8,fp8,0,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,64,0,1,fp8,fp8,0,0.03200533241033554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,float16,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,float16,0,0.03427733232577642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,fp8,0,0.034341332813103996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,64,128,1,fp8,fp8,0,0.024933333198229473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,64,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,float16,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,64,128,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,64,0,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,float16,0,0.024362665911515553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,fp8,0,0.023845332364241283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,64,128,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,64,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,64,128,1,fp8,fp8,0,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,64,128,1,fp8,fp8,0,0.023797333240509033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,64,0,1,fp8,fp8,0,0.023567999402681988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,float16,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,64,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,64,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,float16,0,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,float16,0,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,64,0,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,64,128,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,float16,0,0.2151093284289042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,float16,0,0.21525333325068155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,fp8,0,0.21385065714518228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,64,128,1,fp8,fp8,0,0.19919466972351074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,float16,0,0.2159199913342794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,fp8,0,0.21418132384618124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,64,0,1,fp8,fp8,0,0.19907732804616293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,float16,0,0.21570134162902832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,fp8,0,0.21447465817133585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,64,128,1,fp8,fp8,0,0.2029119928677877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,float16,0,0.21659199396769205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,fp8,0,0.2148853341738383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,64,0,1,fp8,fp8,0,0.20217067003250122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,float16,0,0.21749866008758545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,fp8,0,0.2171786626180013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,64,128,1,fp8,fp8,0,0.20534400145212808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,fp8,0,0.21579732497533163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,64,0,1,fp8,fp8,0,0.2059040069580078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,float16,0,0.2182719906171163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,fp8,0,0.21815999348958334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,float16,0,0.12142933408419292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,float16,0,0.22055466969807944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,fp8,0,0.21724800268809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,64,128,1,fp8,fp8,0,0.21076265970865884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,64,0,1,fp8,fp8,0,0.21107733249664307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,float16,0,0.12172800302505493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,fp8,0,0.12110400199890137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,64,128,1,fp8,fp8,0,0.12072533369064331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,fp8,0,0.12149866422017415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,64,0,1,fp8,fp8,0,0.12113599975903828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,float16,0,0.1150986651579539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,float16,0,0.11517332990964253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,fp8,0,0.1153546671072642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,64,128,1,fp8,fp8,0,0.106495996316274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,fp8,0,0.11719466249148051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,64,0,1,fp8,fp8,0,0.10698666175206502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,float16,0,0.11569600303967793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,64,0,1,fp8,fp8,0,0.10699199636777242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,float16,0,0.1147093375523885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,fp8,0,0.11543466647466023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,64,128,1,fp8,fp8,0,0.10665599505106609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,fp8,0,0.114138662815094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,float16,0,0.11614400148391724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,float16,0,0.11548266808191936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,fp8,0,0.11557333668073018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,64,128,1,fp8,fp8,0,0.10707199573516846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,fp8,0,0.11489599943161011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,64,0,1,fp8,fp8,0,0.10708266496658325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,float16,0,0.11522666613260905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,float16,0,0.11622933546702068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,fp8,0,0.1162506639957428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,float16,0,0.06636266907056172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,64,128,1,fp8,fp8,0,0.11045333743095398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,fp8,0,0.11615999539693196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,64,0,1,fp8,fp8,0,0.1113920013109843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,64,0,1,fp8,fp8,0,0.06611200173695882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,float16,0,0.06814933319886525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,fp8,0,0.06675200164318085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,64,128,1,fp8,fp8,0,0.06614399949709575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,64,128,1,fp8,fp8,0,0.06198933223883311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,fp8,0,0.0670666644970576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,float16,0,0.06427200138568878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,float16,0,0.06599999964237213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,float16,0,0.06593066453933716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,fp8,0,0.06606400012969971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,fp8,0,0.06615466872851054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,64,0,1,fp8,fp8,0,0.061941335598627724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,64,0,1,fp8,fp8,0,0.060549333691596985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,float16,0,0.06607466439406078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,fp8,0,0.06438399851322174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,64,128,1,fp8,fp8,0,0.062208001812299095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,fp8,0,0.06637866795063019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,fp8,0,0.06499200065930684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,float16,0,0.06614933411280315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,float16,0,0.06443200012048085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,fp8,0,0.06431999802589417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,64,128,1,fp8,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,64,0,1,fp8,fp8,0,0.06204266846179962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,float16,0,0.06595733265082042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,float16,0,0.06610666712125142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,float16,0,0.04248533149560293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,fp8,0,0.06526400148868561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,64,128,1,fp8,fp8,0,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,fp8,0,0.06500266492366791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,64,0,1,fp8,fp8,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,float16,0,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,64,128,1,fp8,fp8,0,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,fp8,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,64,0,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,float16,0,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,64,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,float16,0,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,fp8,0,0.04040000090996424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,64,128,1,fp8,fp8,0,0.03862400104602178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,64,0,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,fp8,0,0.04200000067551931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,float16,0,0.040864000717798867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,fp8,0,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,64,128,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,fp8,0,0.041050667564074196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,fp8,0,0.04114133367935816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,float16,0,0.040709334115187325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,float16,0,0.0413973331451416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,64,128,1,fp8,fp8,0,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,64,0,1,fp8,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,float16,0,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,fp8,0,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,64,128,1,fp8,fp8,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,fp8,0,0.042378668983777366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,64,0,1,fp8,fp8,0,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,float16,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,float16,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,fp8,0,0.02790933350721995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,64,128,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,64,0,1,fp8,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,float16,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,64,128,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,64,0,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,64,0,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,float16,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,64,128,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,64,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,64,128,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,float16,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,float16,0,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,fp8,0,0.026709333062171936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,64,128,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,64,0,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,float16,0,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,64,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,64,128,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,fp8,0,0.020261333634455998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,64,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,float16,0,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,64,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,64,0,1,fp8,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,64,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,64,128,1,fp8,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,fp8,0,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,64,0,1,fp8,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,fp8,0,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,64,128,1,fp8,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,64,0,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,64,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,64,128,1,fp8,fp8,0,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,64,0,1,fp8,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,float16,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,float16,0,0.17283199230829874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,float16,0,0.1734559933344523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,fp8,0,0.17388266324996948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,64,128,1,fp8,fp8,0,0.15849066774050394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,fp8,0,0.17327467600504556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,64,0,1,fp8,fp8,0,0.15821333726247153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,float16,0,0.17410133282343546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,fp8,0,0.17246399323145548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,float16,0,0.17271999518076578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,fp8,0,0.17321600516637167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,64,128,1,fp8,fp8,0,0.15918933351834616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,64,0,1,fp8,fp8,0,0.1607306698958079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,float16,0,0.17420266071955362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,float16,0,0.17307732502619425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,fp8,0,0.17293866475423178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,64,128,1,fp8,fp8,0,0.15915733575820923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,fp8,0,0.17417599757512411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,64,0,1,fp8,fp8,0,0.15927466750144958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,64,128,1,fp8,fp8,0,0.1622933348019918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,float16,0,0.17535465955734253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,float16,0,0.17461333672205606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,float16,0,0.0942133367061615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,fp8,0,0.17267733812332153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,64,128,1,fp8,fp8,0,0.09035199880599976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,fp8,0,0.17262399196624756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,64,0,1,fp8,fp8,0,0.16338666280110678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,float16,0,0.09468799829483032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,fp8,0,0.09325333436330159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,fp8,0,0.09473066528638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,64,0,1,fp8,fp8,0,0.09021866321563721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,float16,0,0.09296533465385437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,float16,0,0.09269332885742188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,fp8,0,0.093231995900472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,64,128,1,fp8,fp8,0,0.08683199683825175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,fp8,0,0.092549333969752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,64,0,1,fp8,fp8,0,0.08642133076985677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,float16,0,0.09297600388526917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,float16,0,0.09302399555842082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,fp8,0,0.09294933080673218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,64,128,1,fp8,fp8,0,0.08781333764394124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,fp8,0,0.09274666508038838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,64,0,1,fp8,fp8,0,0.0865226686000824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,float16,0,0.09267200032869975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,float16,0,0.09301867087682088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,fp8,0,0.0923520028591156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,64,128,1,fp8,fp8,0,0.08679466446240743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,fp8,0,0.09334400296211243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,64,0,1,fp8,fp8,0,0.08736000458399455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,float16,0,0.09275199969609578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,float16,0,0.09364266196886699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,fp8,0,0.09275199969609578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,fp8,0,0.05594133337338766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,64,128,1,fp8,fp8,0,0.08805333574612935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,fp8,0,0.09291199843088786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,64,0,1,fp8,fp8,0,0.08750399947166443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,float16,0,0.0565280020236969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,float16,0,0.055813332398732506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,float16,0,0.05686399837334951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,64,128,1,fp8,fp8,0,0.0533493310213089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,fp8,0,0.05606933434804281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,64,0,1,fp8,fp8,0,0.055173332492510475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,float16,0,0.05409599840641022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,float16,0,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,float16,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,64,128,1,fp8,fp8,0,0.05186666548252106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,64,0,1,fp8,fp8,0,0.05175999800364176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,64,0,1,fp8,fp8,0,0.052517334620157875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,fp8,0,0.05601066847642263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,64,128,1,fp8,fp8,0,0.052517334620157875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,fp8,0,0.05573866764704386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,float16,0,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,float16,0,0.05602133274078369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,float16,0,0.05442133545875549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,fp8,0,0.055104002356529236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,fp8,0,0.056128000219662987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,fp8,0,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,64,128,1,fp8,fp8,0,0.052069331208864846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,fp8,0,0.05557866891225179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,64,0,1,fp8,fp8,0,0.053029333551724754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,float16,0,0.05548266569773356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,64,128,1,fp8,fp8,0,0.05266133447488149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,64,0,1,fp8,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,float16,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,float16,0,0.034287999073664345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,float16,0,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,64,128,1,fp8,fp8,0,0.03404266635576884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,64,0,1,fp8,fp8,0,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,float16,0,0.034688000877698265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,64,128,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,float16,0,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,64,128,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,fp8,0,0.035205334424972534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,64,0,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,float16,0,0.03554133325815201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,64,128,1,fp8,fp8,0,0.03366400053103765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,fp8,0,0.035205334424972534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,64,128,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,64,0,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,float16,0,0.03436266630887985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,float16,0,0.03398400048414866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,fp8,0,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,fp8,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,64,0,1,fp8,fp8,0,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,fp8,0,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,64,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,64,0,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,64,128,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,64,0,1,fp8,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,float16,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,64,128,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,64,0,1,fp8,fp8,0,0.022090665996074677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,64,128,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,64,0,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,64,128,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,64,0,1,fp8,fp8,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,64,128,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,64,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,64,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,64,128,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,64,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,64,0,1,fp8,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,float16,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,float16,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,64,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,64,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,64,128,1,fp8,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,64,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,float16,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,64,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,64,128,1,fp8,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,64,0,1,fp8,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,64,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,64,128,1,fp8,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,64,0,1,fp8,fp8,0,0.01623999948302905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,float16,0,0.14990933736165366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,float16,0,0.15012799700101218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,float16,0,0.1502346694469452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,fp8,0,0.15002133448918661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,64,128,1,fp8,fp8,0,0.1399893363316854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,fp8,0,0.14872533082962036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,64,0,1,fp8,fp8,0,0.13913066188494363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,float16,0,0.14865066607793173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,fp8,0,0.1502133309841156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,64,128,1,fp8,fp8,0,0.1400373379389445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,fp8,0,0.1497706671555837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,64,128,1,fp8,fp8,0,0.13987200458844504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,64,0,1,fp8,fp8,0,0.14010133345921835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,float16,0,0.15008533000946045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,float16,0,0.14959999918937683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,fp8,0,0.1490506629149119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,fp8,0,0.15030399958292642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,64,0,1,fp8,fp8,0,0.13985066612561545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,float16,0,0.14990933736165366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,float16,0,0.15038933356602988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,fp8,0,0.15005333224932352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,64,128,1,fp8,fp8,0,0.1418880025545756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,fp8,0,0.14940800269444784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,64,0,1,fp8,fp8,0,0.141077329715093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,fp8,0,0.08462933699289958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,float16,0,0.0828906645377477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,float16,0,0.08283733328183492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,fp8,0,0.08450667063395183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,fp8,0,0.08216000099976857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,64,128,1,fp8,fp8,0,0.07894933223724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,64,0,1,fp8,fp8,0,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,fp8,0,0.08267733454704285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,float16,0,0.08277866741021474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,float16,0,0.08286933104197185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,float16,0,0.08247466882069905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,64,128,1,fp8,fp8,0,0.07735999921957652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,64,0,1,fp8,fp8,0,0.07798933486143748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,float16,0,0.08247999846935272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,fp8,0,0.08262933293978374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,64,128,1,fp8,fp8,0,0.07836266855398814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,fp8,0,0.08309333523114522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,64,0,1,fp8,fp8,0,0.07838933169841766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,float16,0,0.08243200182914734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,float16,0,0.0823520024617513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,fp8,0,0.08277866741021474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,64,128,1,fp8,fp8,0,0.07799999912579854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,fp8,0,0.08307733138402303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,64,0,1,fp8,fp8,0,0.07901333272457123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,float16,0,0.08246933420499165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,float16,0,0.08244266609350841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,fp8,0,0.08435733119646709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,float16,0,0.04919999837875366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,64,128,1,fp8,fp8,0,0.07850133379300435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,fp8,0,0.0828000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,fp8,0,0.0498986691236496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,64,0,1,fp8,fp8,0,0.0780213326215744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,float16,0,0.04799466828505198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,float16,0,0.04859200119972229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,fp8,0,0.05002133548259735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,64,128,1,fp8,fp8,0,0.04791999856630961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,64,0,1,fp8,fp8,0,0.046997333566347756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,float16,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,fp8,0,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,64,128,1,fp8,fp8,0,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,fp8,0,0.04946133494377136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,64,0,1,fp8,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,float16,0,0.049642667174339294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,float16,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,fp8,0,0.048986668388048805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,64,128,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,64,0,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,float16,0,0.049322664737701416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,float16,0,0.04957866668701172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,float16,0,0.04776533444722494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,fp8,0,0.04794666667779287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,64,128,1,fp8,fp8,0,0.04727466901143392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,64,128,1,fp8,fp8,0,0.04731200138727824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,fp8,0,0.04905066887537638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,64,0,1,fp8,fp8,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,float16,0,0.047983999053637184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,fp8,0,0.04921066761016846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,fp8,0,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,64,0,1,fp8,fp8,0,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,64,0,1,fp8,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,float16,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,64,128,1,fp8,fp8,0,0.030026666820049286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,float16,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,64,128,1,fp8,fp8,0,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,fp8,0,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,64,0,1,fp8,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,64,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,float16,0,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,64,128,1,fp8,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,float16,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,fp8,0,0.031130666534105938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,64,128,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,64,0,1,fp8,fp8,0,0.03012266755104065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,64,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,64,128,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,float16,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,64,128,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,64,0,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,float16,0,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,float16,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,float16,0,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,64,128,1,fp8,fp8,0,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,64,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,float16,0,0.02279466638962428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,fp8,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,64,0,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,float16,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,64,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,float16,0,0.018346666047970455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,64,128,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,fp8,0,0.0182239996890227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,64,128,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,64,128,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,64,0,1,fp8,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,float16,0,0.016202667107184727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,float16,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,64,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,64,128,1,fp8,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,float16,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,64,128,1,fp8,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,64,128,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,64,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,64,128,1,fp8,fp8,0,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,float16,0,0.9345173041025797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,fp8,0,0.9419946670532227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,64,128,1,fp8,fp8,0,0.868885358174642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,float16,0,0.9488480091094971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,fp8,0,0.9589920043945312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,64,128,1,fp8,fp8,0,0.8884692986806234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,float16,0,0.967631975809733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,fp8,0,0.9767626921335856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,float16,0,5.770528157552083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,64,0,1,fp8,fp8,0,5.355594635009766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,fp8,0,5.79141362508138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,float16,0,5.788570404052734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,64,128,1,fp8,fp8,0,0.9101706345876058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,fp8,0,5.807882944742839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,64,0,1,fp8,fp8,0,5.367818832397461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,float16,0,0.9955946604410807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,fp8,0,1.009541352589925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,float16,0,5.828762690226237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,64,128,1,fp8,fp8,0,0.9474720160166422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,float16,0,0.5517333348592123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,fp8,0,0.5645013252894083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,64,128,1,fp8,fp8,0,0.5331840117772421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,64,0,1,fp8,fp8,0,5.395861307779948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,fp8,0,5.817205429077148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,float16,0,3.050640106201172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,float16,0,0.4926240046819051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,float16,0,5.848005294799805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,fp8,0,0.4972533384958903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,fp8,0,3.0587679545084634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,64,128,1,fp8,fp8,0,0.4618826707204183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,64,0,1,fp8,fp8,0,5.4236799875895185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,fp8,0,5.870255788167317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,64,0,1,fp8,fp8,0,2.830597241719564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,float16,0,0.49675734837849933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,fp8,0,0.5021706819534302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,float16,0,2.9650185902913413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,64,128,1,fp8,fp8,0,0.4672960042953491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,float16,0,0.5037813186645508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,fp8,0,2.9730399449666343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,fp8,0,0.5113386710484823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,64,0,1,fp8,fp8,0,2.758432070414225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,64,128,1,fp8,fp8,0,0.4758293231328328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,float16,0,2.9821014404296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,float16,0,0.5186026493708292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,fp8,0,2.983872095743815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,64,0,1,fp8,fp8,0,2.7597920099894204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,fp8,0,0.5273386637369791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,float16,0,2.980799992879232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,64,128,1,fp8,fp8,0,0.49409600098927814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,float16,0,0.3057600061098735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,fp8,0,0.3141706585884094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,64,0,1,fp8,fp8,0,2.7689441045125327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,fp8,0,2.9894240697224936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,64,128,1,fp8,fp8,0,0.29975465933481854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,float16,0,3.0033651987711587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,float16,0,1.6085119247436523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,float16,0,0.2750613292058309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,64,0,1,fp8,fp8,0,2.7869494756062827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,fp8,0,3.017002741495768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,fp8,0,0.27739200989405316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,64,128,1,fp8,fp8,0,0.26320000489552814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,fp8,0,1.623802661895752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,64,0,1,fp8,fp8,0,1.499951998392741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,float16,0,1.574512004852295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,float16,0,0.2789600094159444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,fp8,0,0.2813813289006551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,64,128,1,fp8,fp8,0,0.2654079993565877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,fp8,0,1.577445348103841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,64,0,1,fp8,fp8,0,1.46452792485555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,float16,0,0.28484266996383667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,float16,0,1.576207955678304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,fp8,0,0.28756266832351685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,64,128,1,fp8,fp8,0,0.27286932865778607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,fp8,0,1.5820266405741374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,64,0,1,fp8,fp8,0,1.4655520121256511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,float16,0,0.29130132993062335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,float16,0,1.5835572878519695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,fp8,0,0.29717334111531574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,64,128,1,fp8,fp8,0,0.2800266742706299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,fp8,0,1.5892213185628254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,64,0,1,fp8,fp8,0,1.472442626953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,float16,0,0.20794665813446045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,fp8,0,0.2060799996058146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,float16,0,1.5945653915405273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,64,128,1,fp8,fp8,0,0.19521600008010864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,float16,0,0.9174133141835531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,float16,0,0.20211732387542725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,64,0,1,fp8,fp8,0,1.4820693333943684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,fp8,0,1.5985013643900554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,fp8,0,0.20226667324701944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,64,128,1,fp8,fp8,0,0.19428267081578574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,fp8,0,0.9172213077545166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,64,0,1,fp8,fp8,0,0.851797342300415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,float16,0,0.9140480359395345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,float16,0,0.2034613291422526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,fp8,0,0.9107093016306559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,fp8,0,0.20394132534662882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,64,0,1,fp8,fp8,0,0.8497920036315918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,64,128,1,fp8,fp8,0,0.19318399826685587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,float16,0,0.9141066869099935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,float16,0,0.2055520017941793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,fp8,0,0.9144372940063477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,fp8,0,0.2034613291422526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,64,0,1,fp8,fp8,0,0.8512213230133057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,64,128,1,fp8,fp8,0,0.19720532496770224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,float16,0,0.9134079615275065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,fp8,0,0.9160160223642985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,float16,0,0.20737600326538086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,64,128,1,fp8,fp8,0,0.19708265860875449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,fp8,0,0.20528000593185425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,64,0,1,fp8,fp8,0,0.8501653671264648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,float16,0,0.9177227020263672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,fp8,0,0.9194986820220947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,float16,0,0.6977866490681967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,64,0,1,fp8,fp8,0,0.8491786321004232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,fp8,0,0.7051839828491211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,64,128,1,fp8,fp8,0,0.649781346321106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,float16,0,0.7064212958017985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,64,128,1,fp8,fp8,0,0.6628959973653158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,fp8,0,0.7133173147837321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,float16,0,0.7200427055358887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,float16,0,3.4219627380371094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,64,0,1,fp8,fp8,0,3.1714401245117188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,fp8,0,3.4313014348347983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,float16,0,3.424410820007324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,fp8,0,0.72652800877889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,64,128,1,fp8,fp8,0,0.6789013544718424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,fp8,0,3.430570602416992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,64,0,1,fp8,fp8,0,3.1771628061930337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,float16,0,0.7413120269775391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,fp8,0,0.7502559820810953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,float16,0,3.43995730082194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,64,128,1,fp8,fp8,0,0.7081066767374674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,float16,0,0.4161440134048462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,fp8,0,0.4249866803487142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,64,0,1,fp8,fp8,0,3.2025651931762695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,64,128,1,fp8,fp8,0,0.40219732125600177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,fp8,0,3.4566453297932944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,float16,0,1.8261760075887044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,float16,0,3.472410519917806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,float16,0,0.3710506757100423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,fp8,0,0.37378132343292236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,64,0,1,fp8,fp8,0,3.2227147420247397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,fp8,0,1.834671974182129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,fp8,0,3.4787521362304688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,64,128,1,fp8,fp8,0,0.35095465183258057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,64,0,1,fp8,fp8,0,1.7063946723937988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,float16,0,1.7774772644042969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,float16,0,0.3737599849700928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,fp8,0,0.3793226480484009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,64,128,1,fp8,fp8,0,0.3550186554590861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,fp8,0,1.7794559796651204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,64,0,1,fp8,fp8,0,1.648911952972412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,float16,0,0.381877342859904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,float16,0,1.776026725769043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,fp8,0,0.38708798090616864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,64,128,1,fp8,fp8,0,0.36210131645202637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,fp8,0,1.7856586774190266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,64,0,1,fp8,fp8,0,1.6556480725606282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,float16,0,0.3914933204650879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,float16,0,1.7872799237569172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,fp8,0,0.3987413247426351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,64,0,1,fp8,fp8,0,1.6608853340148926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,64,128,1,fp8,fp8,0,0.37444265683492023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,fp8,0,1.7891039848327637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,float16,0,0.2363626758257548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,fp8,0,0.24146666129430136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,float16,0,1.8016106287638347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,64,128,1,fp8,fp8,0,0.2314293384552002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,float16,0,0.9826933542887369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,fp8,0,1.8044959704081218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,64,0,1,fp8,fp8,0,1.6728213628133137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,float16,0,0.20962133010228476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,fp8,0,0.9926400184631348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,fp8,0,0.21280533075332642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,64,0,1,fp8,fp8,0,0.9222453435262045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,64,128,1,fp8,fp8,0,0.2035306692123413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,float16,0,0.9518826802571615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,float16,0,0.21348265806833902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,fp8,0,0.21543999512990317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,fp8,0,0.9568373362223307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,64,128,1,fp8,fp8,0,0.20568533738454184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,64,0,1,fp8,fp8,0,0.8911466598510742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,float16,0,0.953333298365275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,float16,0,0.21767999728520712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,fp8,0,0.2202613353729248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,fp8,0,0.954960028330485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,64,128,1,fp8,fp8,0,0.20946667591730753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,64,0,1,fp8,fp8,0,0.8938720226287842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,float16,0,0.9613920052846273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,float16,0,0.22367467482884726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,fp8,0,0.22778133551279703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,fp8,0,0.962394634882609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,64,128,1,fp8,fp8,0,0.21655466159184775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,64,0,1,fp8,fp8,0,0.8994026978810629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,float16,0,0.16263467073440552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,float16,0,0.9682400226593018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,fp8,0,0.1623146633307139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,64,128,1,fp8,fp8,0,0.15426133076349893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,fp8,0,0.9719146887461344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,float16,0,0.16024000446001688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,float16,0,0.5809866587320963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,64,0,1,fp8,fp8,0,0.9061493078867594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,fp8,0,0.577888011932373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,64,0,1,fp8,fp8,0,0.5356746514638265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,fp8,0,0.16032000382741293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,float16,0,0.5714613199234009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,64,128,1,fp8,fp8,0,0.15229333440462747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,fp8,0,0.5718186696370443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,float16,0,0.16053332885106406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,64,0,1,fp8,fp8,0,0.5356853405634562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,fp8,0,0.16032532850901285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,64,128,1,fp8,fp8,0,0.15210666259129843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,float16,0,0.5741653442382812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,fp8,0,0.5736106634140015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,float16,0,0.16056533654530844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,64,0,1,fp8,fp8,0,0.5367146730422974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,fp8,0,0.16237866878509521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,64,128,1,fp8,fp8,0,0.15213867028554282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,float16,0,0.5752373139063517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,fp8,0,0.5754186709721884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,float16,0,0.16241066654523215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,64,0,1,fp8,fp8,0,0.5348693529764811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,float16,0,0.5747893253962199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,fp8,0,0.574351986249288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,fp8,0,0.1622933348019918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,float16,0,0.5834240118662516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,64,128,1,fp8,fp8,0,0.15436800320943198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,fp8,0,0.5883146524429321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,64,0,1,fp8,fp8,0,0.5376053253809611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,64,128,1,fp8,fp8,0,0.5414986610412598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,float16,0,0.5903093417485555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,fp8,0,0.5953226486841837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,64,128,1,fp8,fp8,0,0.552069346110026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,float16,0,2.4683574040730796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,float16,0,0.5994506676991781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,fp8,0,2.4713706970214844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,64,0,1,fp8,fp8,0,2.2881867090861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,fp8,0,0.6064053376515707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,float16,0,2.4764533042907715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,64,128,1,fp8,fp8,0,0.5640106598536173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,fp8,0,2.4795573552449546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,64,0,1,fp8,fp8,0,2.3023573557535806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,float16,0,0.6169919967651367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,float16,0,2.491088072458903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,fp8,0,0.6258080005645752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,64,128,1,fp8,fp8,0,0.5861866474151611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,float16,0,0.35123201211293537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,fp8,0,0.3585546811421712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,fp8,0,2.492197354634603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,64,0,1,fp8,fp8,0,2.3116639455159507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,64,128,1,fp8,fp8,0,0.3407626549402873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,float16,0,2.5129599571228027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,float16,0,1.3381226857503254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,float16,0,0.311791996161143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,fp8,0,2.523354689280192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,64,0,1,fp8,fp8,0,2.3323893547058105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,fp8,0,0.3160266677538554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,64,128,1,fp8,fp8,0,0.2972266674041748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,fp8,0,1.3428692817687988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,64,0,1,fp8,fp8,0,1.2450346946716309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,float16,0,1.2932106653849285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,float16,0,0.31612799564997357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,fp8,0,0.31858134269714355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,64,128,1,fp8,fp8,0,0.301258663336436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,fp8,0,1.2946720123291016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,64,0,1,fp8,fp8,0,1.200933297475179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,float16,0,1.2961920102437336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,float16,0,0.3210879961649577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,fp8,0,0.3264639973640442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,64,128,1,fp8,fp8,0,0.3060266574223836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,fp8,0,1.2980000178019206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,64,0,1,fp8,fp8,0,1.2064800262451172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,float16,0,1.2998826503753662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,float16,0,0.3312106728553772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,fp8,0,0.33638934294382733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,fp8,0,1.3078773021697998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,64,128,1,fp8,fp8,0,0.31613866488138836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,64,0,1,fp8,fp8,0,1.2101013660430908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,float16,0,0.19913599888483682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,float16,0,1.3106239636739094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,fp8,0,0.20323733488718668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,64,128,1,fp8,fp8,0,0.19508800903956094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,float16,0,0.72762664159139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,fp8,0,1.3160106341044109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,float16,0,0.17290133237838745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,64,0,1,fp8,fp8,0,1.222373326619466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,fp8,0,0.733733336130778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,64,0,1,fp8,fp8,0,0.6804426511128744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,fp8,0,0.17492266496022543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,float16,0,0.17616534233093262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,64,128,1,fp8,fp8,0,0.16841065883636475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,float16,0,0.6998613675435384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,float16,0,0.702677329381307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,64,128,1,fp8,fp8,0,0.17151999473571777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,fp8,0,0.7040747006734213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,64,0,1,fp8,fp8,0,0.6554239988327026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,fp8,0,0.17859200636545816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,64,0,1,fp8,fp8,0,0.6561439832051595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,float16,0,0.18083733320236206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,fp8,0,0.7050506273905436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,fp8,0,0.18276800711949667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,64,128,1,fp8,fp8,0,0.17702933152516684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,fp8,0,0.7112212975819906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,float16,0,0.7070293426513672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,float16,0,0.18685867389043173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,64,0,1,fp8,fp8,0,0.6613759994506836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,fp8,0,0.19021866718928018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,64,128,1,fp8,fp8,0,0.1830986738204956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,float16,0,0.7142773469289144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,float16,0,0.13779733578364053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,fp8,0,0.7195200125376383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,64,0,1,fp8,fp8,0,0.6699093182881674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,fp8,0,0.43596800168355304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,fp8,0,0.1390666663646698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,float16,0,0.43537600835164386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,64,128,1,fp8,fp8,0,0.13145599762598673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,float16,0,0.13590400417645773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,64,0,1,fp8,fp8,0,0.40430935223897296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,fp8,0,0.13595733046531677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,float16,0,0.4326133330663045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,64,128,1,fp8,fp8,0,0.12956800063451132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,fp8,0,0.4332266648610433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,float16,0,0.13665599624315897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,64,0,1,fp8,fp8,0,0.40254934628804523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,fp8,0,0.13478400309880575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,float16,0,0.4338560104370117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,64,128,1,fp8,fp8,0,0.1295413374900818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,fp8,0,0.43225598335266113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,float16,0,0.13594667116800943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,64,0,1,fp8,fp8,0,0.4039200146993001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,fp8,0,0.13572800159454346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,float16,0,0.43558398882548016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,64,128,1,fp8,fp8,0,0.12756266196568808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,fp8,0,0.4336426655451457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,float16,0,0.1349493364493052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,64,0,1,fp8,fp8,0,0.4044373432795207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,fp8,0,0.13517866532007852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,float16,0,0.434933344523112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,64,128,1,fp8,fp8,0,0.12956266601880392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,fp8,0,0.43456534544626874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,64,0,1,fp8,fp8,0,0.40292267004648846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,float16,0,0.9095253149668375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,fp8,0,0.9157280127207438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,64,128,1,fp8,fp8,0,0.8432373205820719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,float16,0,0.9247679710388184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,fp8,0,0.9333706696828207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,64,128,1,fp8,fp8,0,0.8648586273193359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,float16,0,3.266074816385905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,float16,0,0.9418880144755045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,64,0,1,fp8,fp8,0,3.0144478480021157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,fp8,0,3.2636000315348306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,float16,0,3.275573412577311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,fp8,0,0.9519573052724203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,fp8,0,3.291253407796224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,64,128,1,fp8,fp8,0,0.8846933046976725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,64,0,1,fp8,fp8,0,3.0395307540893555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,float16,0,0.9737599690755209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,float16,0,3.301520029703776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,fp8,0,0.9836586316426595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,64,128,1,fp8,fp8,0,0.9231253465016683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,float16,0,0.529914657274882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,64,0,1,fp8,fp8,0,3.0583359400431314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,fp8,0,3.303039868672689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,float16,0,3.3418025970458984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,fp8,0,0.5393653313318888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,64,128,1,fp8,fp8,0,0.5096266667048136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,float16,0,1.7450133959452312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,fp8,0,3.347205479939779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,float16,0,0.4673493305842082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,64,0,1,fp8,fp8,0,3.0995521545410156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,fp8,0,1.7530986467997234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,fp8,0,0.4726666609446208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,64,0,1,fp8,fp8,0,1.628991921742757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,64,128,1,fp8,fp8,0,0.43746666113535565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,float16,0,0.47297600905100506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,float16,0,1.6707040468851726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,fp8,0,0.47759465376536053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,64,128,1,fp8,fp8,0,0.4441386858622233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,fp8,0,1.6752746899922688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,64,0,1,fp8,fp8,0,1.5534666379292805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,float16,0,0.48124265670776367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,float16,0,1.6772853533426921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,fp8,0,0.4861760139465332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,64,128,1,fp8,fp8,0,0.4527146816253662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,64,0,1,fp8,fp8,0,1.558725357055664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,fp8,0,1.6856533686319988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,float16,0,0.4949920177459717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,float16,0,1.6891147295633953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,fp8,0,0.5025120178858439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,fp8,0,1.69705597559611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,64,128,1,fp8,fp8,0,0.4700266520182292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,float16,0,0.281333327293396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,64,0,1,fp8,fp8,0,1.5685067176818848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,fp8,0,0.28922667105992633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,float16,0,1.7076212565104167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,64,128,1,fp8,fp8,0,0.27447466055552167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,float16,0,0.9178773562113444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,float16,0,0.24902933835983276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,fp8,0,1.7150506973266602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,64,0,1,fp8,fp8,0,1.5828693707784016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,fp8,0,0.9247732957204183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,64,0,1,fp8,fp8,0,0.8585120042165121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,fp8,0,0.25121599435806274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,float16,0,0.2510773340861003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,64,128,1,fp8,fp8,0,0.23829332987467447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,float16,0,0.8812106450398763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,fp8,0,0.8845600287119547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,64,0,1,fp8,fp8,0,0.8238879839579264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,fp8,0,0.25283199548721313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,64,128,1,fp8,fp8,0,0.24162666002909342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,float16,0,0.8842293421427408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,float16,0,0.2581546703974406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,fp8,0,0.8846453030904134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,64,0,1,fp8,fp8,0,0.8241653442382812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,fp8,0,0.2604373296101888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,64,128,1,fp8,fp8,0,0.24679466088612875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,float16,0,0.8920000394185384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,fp8,0,0.8962666988372803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,float16,0,0.26579199234644574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,64,0,1,fp8,fp8,0,0.831706682840983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,fp8,0,0.2688746651013692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,64,128,1,fp8,fp8,0,0.25467199087142944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,float16,0,0.899498701095581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,float16,0,0.5092800060907999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,float16,0,0.15955199797948202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,fp8,0,0.16433599591255188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,fp8,0,0.9045813083648682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,64,0,1,fp8,fp8,0,0.8403147061665853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,64,128,1,fp8,fp8,0,0.1581706702709198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,float16,0,0.13809600472450256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,fp8,0,0.5130560000737509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,64,0,1,fp8,fp8,0,0.47812267144521076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,fp8,0,0.1409226655960083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,float16,0,0.14006400108337402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,float16,0,0.4853813250859578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,fp8,0,0.14205333590507507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,64,128,1,fp8,fp8,0,0.13381866614023843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,fp8,0,0.4874933163324992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,64,0,1,fp8,fp8,0,0.45153601964314777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,float16,0,0.48788265387217206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,64,128,1,fp8,fp8,0,0.1353333294391632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,fp8,0,0.4888960123062134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,float16,0,0.14421332875887552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,64,0,1,fp8,fp8,0,0.45631468296051025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,fp8,0,0.14517866571744284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,float16,0,0.49127999941507977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,64,128,1,fp8,fp8,0,0.1421333352724711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,fp8,0,0.49354668458302814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,float16,0,0.15033066272735596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,64,0,1,fp8,fp8,0,0.4615039825439453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,fp8,0,0.49984534581502277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,fp8,0,0.1523253321647644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,float16,0,0.3141813278198242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,float16,0,0.4979360103607178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,64,128,1,fp8,fp8,0,0.14891733725865683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,float16,0,0.11333333452542622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,fp8,0,0.31546666224797565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,64,0,1,fp8,fp8,0,0.29206399122873944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,64,0,1,fp8,fp8,0,0.46727466583251953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,fp8,0,0.1123306651910146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,float16,0,0.31070399284362793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,64,128,1,fp8,fp8,0,0.10776000221570332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,float16,0,0.10922132929166158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,float16,0,0.11122133334477742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,fp8,0,0.11146666606267293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,float16,0,0.3108746608098348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,64,128,1,fp8,fp8,0,0.10542933146158855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,fp8,0,0.31115732590357464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,64,0,1,fp8,fp8,0,0.2914399902025859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,fp8,0,0.10940800110499065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,64,128,1,fp8,fp8,0,0.10521066188812256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,fp8,0,0.31214932600657147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,64,0,1,fp8,fp8,0,0.28944534063339233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,float16,0,0.10945066809654236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,fp8,0,0.111135999361674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,float16,0,0.31191466252009076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,64,128,1,fp8,fp8,0,0.10529067118962605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,fp8,0,0.3123520016670227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,float16,0,0.11141866445541382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,64,0,1,fp8,fp8,0,0.2906720042228699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,float16,0,0.3114933371543884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,fp8,0,0.1109386682510376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,64,128,1,fp8,fp8,0,0.10530666510264079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,fp8,0,0.31301865975062054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,64,0,1,fp8,fp8,0,0.29131199916203815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,float16,0,0.6814666589101156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,fp8,0,0.6880319913228353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,64,128,1,fp8,fp8,0,0.6308159828186035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,float16,0,0.6906239986419678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,float16,0,1.9849546750386555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,fp8,0,0.6967360178629557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,64,128,1,fp8,fp8,0,0.6437973181406657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,fp8,0,1.9860426584879558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,64,0,1,fp8,fp8,0,1.8329013188680012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,float16,0,0.7033653259277344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,float16,0,1.99019193649292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,fp8,0,0.7108159859975179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,64,128,1,fp8,fp8,0,0.6594346761703491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,64,0,1,fp8,fp8,0,1.8512427012125652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,fp8,0,1.997968037923177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,float16,0,2.012330691019694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,float16,0,0.7244640191396078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,fp8,0,0.7321066856384277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,64,128,1,fp8,fp8,0,0.6899466514587402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,fp8,0,2.012949307759603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,64,0,1,fp8,fp8,0,1.8676160176595051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,float16,0,0.3981279929478963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,float16,0,2.0351413091023765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,fp8,0,0.40814932187398273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,64,128,1,fp8,fp8,0,0.3851413329442342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,float16,0,1.0779039859771729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,fp8,0,2.0449546178181968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,float16,0,0.35254931449890137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,64,0,1,fp8,fp8,0,1.8989866574605305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,fp8,0,1.0883306662241619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,64,0,1,fp8,fp8,0,1.0083146890004475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,fp8,0,0.3551093339920044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,64,128,1,fp8,fp8,0,0.3323253393173218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,float16,0,1.0261867046356201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,float16,0,0.35652267932891846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,fp8,0,0.3611413240432739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,fp8,0,1.0304853121439617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,64,0,1,fp8,fp8,0,0.9542506535847982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,64,128,1,fp8,fp8,0,0.3376213312149048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,float16,0,1.030400037765503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,float16,0,0.36372268199920654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,fp8,0,1.0314666430155437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,fp8,0,0.36933334668477374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,64,0,1,fp8,fp8,0,0.960757335027059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,64,128,1,fp8,fp8,0,0.3444853226343791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,float16,0,1.0395786762237549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,float16,0,0.37532798449198407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,fp8,0,0.3813653389612834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,fp8,0,1.0422773361206055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,64,0,1,fp8,fp8,0,0.968559980392456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,64,128,1,fp8,fp8,0,0.35684800148010254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,float16,0,1.0493760108947754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,fp8,0,0.2222773234049479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,fp8,0,1.0595893065134685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,float16,0,0.21760533253351846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,float16,0,0.5772906541824341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,64,128,1,fp8,fp8,0,0.2113706668217977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,64,0,1,fp8,fp8,0,0.9785333474477133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,float16,0,0.18949333826700845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,fp8,0,0.5819786787033081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,64,0,1,fp8,fp8,0,0.5420426527659098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,fp8,0,0.19129600127538046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,64,128,1,fp8,fp8,0,0.18449066082636514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,float16,0,0.5482079982757568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,fp8,0,0.19428799549738565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,fp8,0,0.5488213300704956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,float16,0,0.19124799966812134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,64,0,1,fp8,fp8,0,0.5123999913533529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,fp8,0,0.5512959957122803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,float16,0,0.5475360155105591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,fp8,0,0.19910933574040732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,64,128,1,fp8,fp8,0,0.18491200606028238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,float16,0,0.19753599166870117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,64,0,1,fp8,fp8,0,0.514853318532308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,64,128,1,fp8,fp8,0,0.18986666202545166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,float16,0,0.5560746590296427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,fp8,0,0.5570133527119955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,float16,0,0.20364266633987427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,64,0,1,fp8,fp8,0,0.5187946557998657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,fp8,0,0.20734934012095133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,float16,0,0.5621600151062012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,64,128,1,fp8,fp8,0,0.19701866308848062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,float16,0,0.125408003727595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,fp8,0,0.567792018254598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,64,0,1,fp8,fp8,0,0.527237335840861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,fp8,0,0.3285386761029561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,float16,0,0.32441065708796185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,fp8,0,0.12786133090655008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,64,128,1,fp8,fp8,0,0.1253973344961802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,64,0,1,fp8,fp8,0,0.30982400973637897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,float16,0,0.11136000355084737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,fp8,0,0.1127946674823761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,float16,0,0.3091520071029663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,64,128,1,fp8,fp8,0,0.10524800419807434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,float16,0,0.11108266313870747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,fp8,0,0.3107893268267314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,64,0,1,fp8,fp8,0,0.2879306674003601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,fp8,0,0.11339199542999268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,float16,0,0.31017067035039264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,64,128,1,fp8,fp8,0,0.10520000259081523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,fp8,0,0.311514675617218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,64,0,1,fp8,fp8,0,0.28958932558695477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,float16,0,0.11215466260910034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,64,0,1,fp8,fp8,0,0.2914453347524007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,float16,0,0.31273066997528076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,fp8,0,0.11446932951609294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,fp8,0,0.11923199892044067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,64,128,1,fp8,fp8,0,0.10941333572069804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,fp8,0,0.3121386567751567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,fp8,0,0.3195786674817403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,float16,0,0.11897066235542297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,float16,0,0.20745599269866943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,float16,0,0.3164213299751282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,64,128,1,fp8,fp8,0,0.11642133196194966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,float16,0,0.0867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,64,0,1,fp8,fp8,0,0.30073599020640057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,fp8,0,0.08519466718037923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,float16,0,0.20706667502721152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,64,128,1,fp8,fp8,0,0.08309866487979889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,64,128,1,fp8,fp8,0,0.08354133367538452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,fp8,0,0.20722667376200357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,64,0,1,fp8,fp8,0,0.19362666209538779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,float16,0,0.08646933237711589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,fp8,0,0.08654399712880452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,fp8,0,0.20669867595036825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,64,0,1,fp8,fp8,0,0.19346133867899576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,float16,0,0.08654399712880452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,64,0,1,fp8,fp8,0,0.19322667519251505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,float16,0,0.20751466353734335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,fp8,0,0.08678399523099263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,64,128,1,fp8,fp8,0,0.08303466439247131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,fp8,0,0.20702399810155234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,float16,0,0.08662399649620056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,float16,0,0.20736000935236612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,fp8,0,0.08649067083994548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,fp8,0,0.08646933237711589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,64,128,1,fp8,fp8,0,0.08272533118724823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,fp8,0,0.20765332380930582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,fp8,0,0.20786132415135702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,64,0,1,fp8,fp8,0,0.19164266188939413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,float16,0,0.086517333984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,float16,0,0.2079306642214457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,64,128,1,fp8,fp8,0,0.08266133566697438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,64,0,1,fp8,fp8,0,0.1930933396021525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,float16,0,0.897653341293335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,fp8,0,0.905951976776123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,64,128,1,fp8,fp8,0,0.8326826890309652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,float16,0,1.9882346789042156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,float16,0,0.9147466818491617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,fp8,0,0.9205599625905355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,64,128,1,fp8,fp8,0,0.8528746763865153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,fp8,0,1.9961973826090496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,64,0,1,fp8,fp8,0,1.8402560551961262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,float16,0,2.0076427459716797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,float16,0,0.934117317199707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,fp8,0,2.014026641845703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,fp8,0,0.9409493605295817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,64,128,1,fp8,fp8,0,0.8738613128662109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,64,0,1,fp8,fp8,0,1.8619413375854492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,float16,0,2.023237387339274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,float16,0,0.9637227058410645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,fp8,0,2.02947727839152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,fp8,0,0.9726026852925619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,64,128,1,fp8,fp8,0,0.9102773666381836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,64,0,1,fp8,fp8,0,1.8855093320210774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,float16,0,0.5178399880727133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,float16,0,2.062122662862142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,64,128,1,fp8,fp8,0,0.49996264775594074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,fp8,0,0.528719981511434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,float16,0,1.0800906817118328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,fp8,0,2.0677706400553384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,64,0,1,fp8,fp8,0,1.9190452893575032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,fp8,0,1.093839963277181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,float16,0,0.4561440149943034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,64,0,1,fp8,fp8,0,1.0170186360677083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,fp8,0,0.4601653416951497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,64,128,1,fp8,fp8,0,0.42577600479125977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,float16,0,1.0169119834899902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,float16,0,0.4612533251444499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,fp8,0,1.0162773132324219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,64,0,1,fp8,fp8,0,0.9449333349863688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,fp8,0,0.4663039843241374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,64,128,1,fp8,fp8,0,0.43347732226053876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,float16,0,1.0184266567230225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,float16,0,0.4697386821111043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,fp8,0,1.0255786577860515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,64,0,1,fp8,fp8,0,0.9504533608754476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,fp8,0,0.47628267606099445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,64,128,1,fp8,fp8,0,0.44332265853881836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,float16,0,1.031061331431071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,float16,0,0.4844906727472941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,fp8,0,1.033392031987508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,64,0,1,fp8,fp8,0,0.9566880067189535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,fp8,0,0.4926079909006755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,64,128,1,fp8,fp8,0,0.45926400025685626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,float16,0,1.0464853445688884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,float16,0,0.2709386746088664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,fp8,0,0.2769013245900472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,fp8,0,1.053818702697754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,float16,0,0.565941333770752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,64,0,1,fp8,fp8,0,0.9773920377095541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,64,128,1,fp8,fp8,0,0.26268800099690753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,fp8,0,0.57423468430837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,float16,0,0.23586666584014893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,64,0,1,fp8,fp8,0,0.5347893238067627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,fp8,0,0.23907732963562012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,float16,0,0.5308320124944051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,64,128,1,fp8,fp8,0,0.22740799188613892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,float16,0,0.5337173144022623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,float16,0,0.2400266726811727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,64,0,1,fp8,fp8,0,0.4994080066680908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,fp8,0,0.5310293436050415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,fp8,0,0.24262400468190512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,64,128,1,fp8,fp8,0,0.22990399599075317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,float16,0,0.5411520004272461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,64,128,1,fp8,fp8,0,0.23432532946268717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,fp8,0,0.5366826852162679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,float16,0,0.24619199832280478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,64,0,1,fp8,fp8,0,0.5025173425674438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,fp8,0,0.24921067555745444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,fp8,0,0.5436160167058309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,float16,0,0.2541813254356384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,64,0,1,fp8,fp8,0,0.5066560109456381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,fp8,0,0.25860265890757245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,float16,0,0.5486346483230591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,64,128,1,fp8,fp8,0,0.24412800868352255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,fp8,0,0.555402676264445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,float16,0,0.14663466811180115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,fp8,0,0.3141653339068095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,64,0,1,fp8,fp8,0,0.5165119965871176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,float16,0,0.3110293348630269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,fp8,0,0.1502079963684082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,64,128,1,fp8,fp8,0,0.12124266227086385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,64,128,1,fp8,fp8,0,0.14595199624697366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,64,0,1,fp8,fp8,0,0.29499733448028564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,float16,0,0.1253546675046285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,fp8,0,0.12786666552225748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,float16,0,0.28683199485143024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,fp8,0,0.29078400135040283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,float16,0,0.12754666805267334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,64,0,1,fp8,fp8,0,0.2694186568260193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,fp8,0,0.1284213364124298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,float16,0,0.28987733523050946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,float16,0,0.29233066240946454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,64,128,1,fp8,fp8,0,0.12296000123023987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,fp8,0,0.29101866483688354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,fp8,0,0.2956479986508687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,64,0,1,fp8,fp8,0,0.27186665932337445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,float16,0,0.13150933384895325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,fp8,0,0.13193066914876303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,64,128,1,fp8,fp8,0,0.1276853382587433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,64,0,1,fp8,fp8,0,0.27754666407903034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,float16,0,0.13736533125241598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,float16,0,0.29972267150878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,fp8,0,0.13808533549308777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,64,128,1,fp8,fp8,0,0.13398399949073792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,fp8,0,0.30236266056696576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,64,0,1,fp8,fp8,0,0.2840213378270467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,float16,0,0.08475733796755473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,float16,0,0.07831466694672902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,float16,0,0.1794346570968628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,fp8,0,0.088837335507075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,64,128,1,fp8,fp8,0,0.08749866485595703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,fp8,0,0.18228266636530557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,64,0,1,fp8,fp8,0,0.1745120088259379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,float16,0,0.0787360022465388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,float16,0,0.17292799552281699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,float16,0,0.17253865798314413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,fp8,0,0.07868800063927968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,64,128,1,fp8,fp8,0,0.0747680018345515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,fp8,0,0.1739306648572286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,64,0,1,fp8,fp8,0,0.16057067116101584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,fp8,0,0.08056533336639404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,64,128,1,fp8,fp8,0,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,fp8,0,0.1732800006866455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,64,0,1,fp8,fp8,0,0.1613759994506836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,float16,0,0.08060266574223836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,float16,0,0.17462400595347086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,64,0,1,fp8,fp8,0,0.1625866691271464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,fp8,0,0.08069866895675659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,float16,0,0.17511999607086182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,64,128,1,fp8,fp8,0,0.08041066428025563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,64,128,1,fp8,fp8,0,0.07631466786066692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,fp8,0,0.17532267173131308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,float16,0,0.08250666658083598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,fp8,0,0.08370133241017659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,fp8,0,0.17633599042892456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,64,0,1,fp8,fp8,0,0.16642666856447855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,float16,0,0.061861331264177956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,float16,0,0.12403200070063274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,fp8,0,0.06192000210285187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,64,128,1,fp8,fp8,0,0.06006399790445963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,fp8,0,0.1253493328889211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,64,0,1,fp8,fp8,0,0.1167680025100708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,fp8,0,0.12380799651145935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,float16,0,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,float16,0,0.12338133653004964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,fp8,0,0.06187200049559275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,fp8,0,0.0620000014702479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,64,128,1,fp8,fp8,0,0.059343998630841575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,64,0,1,fp8,fp8,0,0.11761066317558289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,float16,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,float16,0,0.12562132875124613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,64,128,1,fp8,fp8,0,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,fp8,0,0.12380799651145935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,64,0,1,fp8,fp8,0,0.11737066507339478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,float16,0,0.062122667829195656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,float16,0,0.12546666463216147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,fp8,0,0.06218666831652323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,fp8,0,0.062234664956728615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,64,128,1,fp8,fp8,0,0.06005333364009857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,fp8,0,0.12566399574279785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,64,0,1,fp8,fp8,0,0.11728533109029134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,float16,0,0.06124266485373179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,float16,0,0.12556800246238708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,64,128,1,fp8,fp8,0,0.059658666451772056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,fp8,0,0.12571733196576437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,64,0,1,fp8,fp8,0,0.11756267150243123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,float16,0,0.674394687016805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,fp8,0,0.6807413101196289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,64,128,1,fp8,fp8,0,0.6239360173543295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,float16,0,1.259434700012207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,float16,0,0.686896006266276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,fp8,0,1.2630186875661213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,64,0,1,fp8,fp8,0,1.1650559902191162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,fp8,0,0.6896586418151855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,64,128,1,fp8,fp8,0,0.6389333407084147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,float16,0,1.2666827042897542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,float16,0,0.6992373466491699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,fp8,0,1.2706720034281414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,64,0,1,fp8,fp8,0,1.1813120047251384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,fp8,0,0.7039413452148438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,64,128,1,fp8,fp8,0,0.6552586555480957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,float16,0,1.2844959894816081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,fp8,0,1.2869173685709636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,float16,0,0.7228853702545166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,64,0,1,fp8,fp8,0,1.1954560279846191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,fp8,0,0.7276906967163086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,64,128,1,fp8,fp8,0,0.6814400355021158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,float16,0,0.3925653298695882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,float16,0,1.3064160346984863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,fp8,0,0.39974931875864667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,64,128,1,fp8,fp8,0,0.37726934750874835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,float16,0,0.6983199914296468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,fp8,0,1.3103040059407551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,float16,0,0.3434986670811971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,64,0,1,fp8,fp8,0,1.225061337153117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,fp8,0,0.7040800253550211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,64,0,1,fp8,fp8,0,0.6580319801966349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,fp8,0,0.34698665142059326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,64,128,1,fp8,fp8,0,0.32416532437006634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,float16,0,0.6445279916127523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,64,0,1,fp8,fp8,0,0.6032906770706177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,fp8,0,0.649125337600708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,float16,0,0.34889066219329834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,fp8,0,0.35259199142456055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,fp8,0,0.6551253398259481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,64,0,1,fp8,fp8,0,0.6092693408330282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,float16,0,0.6500693162282308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,64,128,1,fp8,fp8,0,0.3299093246459961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,float16,0,0.35674134890238446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,fp8,0,0.3606880108515422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,fp8,0,0.6638506650924683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,float16,0,0.65993599096934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,64,128,1,fp8,fp8,0,0.3373546600341797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,fp8,0,0.37291733423868817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,float16,0,0.36880000432332355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,64,0,1,fp8,fp8,0,0.6168533166249593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,float16,0,0.6726453304290771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,64,128,1,fp8,fp8,0,0.3487360080083211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,float16,0,0.20971200863520303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,fp8,0,0.6758026281992594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,64,0,1,fp8,fp8,0,0.6294026772181193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,fp8,0,0.37495466073354083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,float16,0,0.3691733280817668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,fp8,0,0.21413866678873697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,64,128,1,fp8,fp8,0,0.2036479910214742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,64,0,1,fp8,fp8,0,0.35155200958251953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,float16,0,0.17893334229787192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,float16,0,0.34006933371225995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,fp8,0,0.18107734123865762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,64,128,1,fp8,fp8,0,0.17389333248138428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,fp8,0,0.3416373332341512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,64,0,1,fp8,fp8,0,0.32238932450612384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,float16,0,0.1830880045890808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,float16,0,0.3404906590779622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,fp8,0,0.1848319967587789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,64,128,1,fp8,fp8,0,0.17652799685796103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,fp8,0,0.34280534585316974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,float16,0,0.18701332807540894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,64,0,1,fp8,fp8,0,0.32420265674591064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,float16,0,0.34727466106414795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,fp8,0,0.19088532527287802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,64,128,1,fp8,fp8,0,0.1811466614405314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,fp8,0,0.3500959873199463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,64,0,1,fp8,fp8,0,0.3306613365809123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,float16,0,0.19511467218399048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,float16,0,0.3548213243484497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,fp8,0,0.19881600141525269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,64,128,1,fp8,fp8,0,0.1890986760457357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,fp8,0,0.35736000537872314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,float16,0,0.11553600430488586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,64,0,1,fp8,fp8,0,0.3380213181177775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,float16,0,0.20536533991495767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,fp8,0,0.11889599760373433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,64,128,1,fp8,fp8,0,0.11530666550000508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,fp8,0,0.20773865779240927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,64,0,1,fp8,fp8,0,0.19732799132665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,float16,0,0.10125866532325745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,float16,0,0.1889280080795288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,fp8,0,0.10307733217875163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,64,128,1,fp8,fp8,0,0.09529067079226176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,fp8,0,0.19129067659378052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,64,0,1,fp8,fp8,0,0.17657599846522012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,float16,0,0.10101866722106934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,float16,0,0.18916267156600952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,fp8,0,0.10328533252080281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,64,128,1,fp8,fp8,0,0.09674666325251262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,fp8,0,0.19151999553044638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,64,0,1,fp8,fp8,0,0.17698667446772257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,float16,0,0.10322133700052898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,float16,0,0.19154133399327597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,fp8,0,0.10454400380452473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,fp8,0,0.10928533474604289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,64,128,1,fp8,fp8,0,0.09903466701507568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,64,128,1,fp8,fp8,0,0.10716799894968669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,fp8,0,0.1938986579577128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,64,0,1,fp8,fp8,0,0.1809920072555542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,float16,0,0.10698666175206502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,float16,0,0.19513066609700522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,fp8,0,0.19801600774129233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,64,0,1,fp8,fp8,0,0.18918399016062418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,float16,0,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,float16,0,0.12176000078519185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,float16,0,0.11813867092132568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,fp8,0,0.07036266724268596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,64,128,1,fp8,fp8,0,0.0680213322242101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,fp8,0,0.12370666861534119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,64,0,1,fp8,fp8,0,0.11727999647458394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,float16,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,float16,0,0.06369600196679433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,fp8,0,0.06233599781990051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,64,128,1,fp8,fp8,0,0.05990933378537496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,fp8,0,0.11963733037312825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,64,0,1,fp8,fp8,0,0.11116799712181091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,float16,0,0.11759466926256816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,fp8,0,0.06368533273537953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,64,128,1,fp8,fp8,0,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,fp8,0,0.11875733733177185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,fp8,0,0.11989866693814595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,64,0,1,fp8,fp8,0,0.11108799775441487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,64,0,1,fp8,fp8,0,0.11116266250610352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,float16,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,float16,0,0.11986133456230164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,64,128,1,fp8,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,fp8,0,0.06389866769313812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,64,128,1,fp8,fp8,0,0.061568001906077065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,float16,0,0.06417599817117055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,float16,0,0.12029866377512614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,fp8,0,0.0660159985224406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,fp8,0,0.12158933281898499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,64,0,1,fp8,fp8,0,0.11437333623568217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,float16,0,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,float16,0,0.05375466744105021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,float16,0,0.08869866530100505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,fp8,0,0.053802669048309326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,64,128,1,fp8,fp8,0,0.05170666674772898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,64,128,1,fp8,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,fp8,0,0.08867733677228291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,64,0,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,float16,0,0.08878933389981587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,float16,0,0.08892266949017842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,fp8,0,0.08860799670219421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,64,0,1,fp8,fp8,0,0.08442667126655579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,64,0,1,fp8,fp8,0,0.08272000153859456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,float16,0,0.052069331208864846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,fp8,0,0.05385066568851471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,64,128,1,fp8,fp8,0,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,fp8,0,0.08852266271909077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,float16,0,0.05206400156021118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,float16,0,0.05365866422653198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,float16,0,0.08892800410588582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,fp8,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,64,128,1,fp8,fp8,0,0.049882665276527405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,fp8,0,0.08782399694124858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,64,0,1,fp8,fp8,0,0.08267733454704285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,float16,0,0.08760000268618266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,64,128,1,fp8,fp8,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,fp8,0,0.08876799543698628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,64,0,1,fp8,fp8,0,0.08265600105126698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,float16,0,0.8945279916127523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,fp8,0,0.9000746409098307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,64,128,1,fp8,fp8,0,0.825813372929891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,float16,0,1.3439520200093586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,float16,0,0.9103306929270426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,fp8,0,1.353109359741211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,64,0,1,fp8,fp8,0,1.2496533393859863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,fp8,0,0.9200106461842855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,64,128,1,fp8,fp8,0,0.8401226997375488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,float16,0,1.36407470703125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,fp8,0,1.3658026059468586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,float16,0,0.9279893239339193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,64,0,1,fp8,fp8,0,1.2645546595255535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,fp8,0,0.9352959791819254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,64,128,1,fp8,fp8,0,0.8610613346099854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,float16,0,1.3842132886250813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,fp8,0,1.388261318206787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,float16,0,0.9523999691009521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,64,0,1,fp8,fp8,0,1.2828959623972576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,fp8,0,0.9639039834340414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,64,128,1,fp8,fp8,0,0.9025973478953043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,float16,0,1.4094719886779785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,float16,0,0.5158720016479492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,fp8,0,1.4180800120035808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,float16,0,0.7512213389078776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,64,0,1,fp8,fp8,0,1.3261813322703044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,fp8,0,0.5269759893417358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,fp8,0,0.7632479667663574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,64,128,1,fp8,fp8,0,0.4984480142593384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,float16,0,0.45158934593200684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,64,0,1,fp8,fp8,0,0.7175306479136149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,fp8,0,0.45577065149943036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,float16,0,0.681658665339152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,64,128,1,fp8,fp8,0,0.4219093322753906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,fp8,0,0.6870720386505127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,float16,0,0.689466635386149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,fp8,0,0.463648001352946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,64,0,1,fp8,fp8,0,0.6368639866511027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,float16,0,0.458624005317688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,64,128,1,fp8,fp8,0,0.4296799898147583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,fp8,0,0.6931680043538412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,64,0,1,fp8,fp8,0,0.6459733247756958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,float16,0,0.46694934368133545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,fp8,0,0.4718133211135864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,float16,0,0.7005493640899658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,64,128,1,fp8,fp8,0,0.4389866590499878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,fp8,0,0.7041599750518799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,64,0,1,fp8,fp8,0,0.6556426684061686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,float16,0,0.4805813233057658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,fp8,0,0.487008015314738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,float16,0,0.7136693000793457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,64,128,1,fp8,fp8,0,0.4551466703414917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,fp8,0,0.2733493248621623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,float16,0,0.2666986584663391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,fp8,0,0.7220426400502523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,64,0,1,fp8,fp8,0,0.6710240046183268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,float16,0,0.3911679983139038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,64,128,1,fp8,fp8,0,0.25896533330281574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,fp8,0,0.3979733387629191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,float16,0,0.23154133558273315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,64,0,1,fp8,fp8,0,0.37326399485270184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,fp8,0,0.3553866545359294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,float16,0,0.3529706796010335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,fp8,0,0.2334666649500529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,float16,0,0.35660799344380695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,64,128,1,fp8,fp8,0,0.2216213345527649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,64,0,1,fp8,fp8,0,0.3349279959996541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,float16,0,0.23406400283177695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,float16,0,0.2408533294995626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,fp8,0,0.23624533414840698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,64,128,1,fp8,fp8,0,0.22431466976801553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,fp8,0,0.3595893383026123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,64,0,1,fp8,fp8,0,0.33776533603668213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,float16,0,0.36259734630584717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,fp8,0,0.24439465999603271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,64,128,1,fp8,fp8,0,0.23125867048899332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,fp8,0,0.3661653200785319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,64,0,1,fp8,fp8,0,0.3452479839324951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,float16,0,0.24928534030914307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,float16,0,0.37328533331553143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,fp8,0,0.2540266712506612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,64,128,1,fp8,fp8,0,0.23690134286880493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,fp8,0,0.3759466807047526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,float16,0,0.14193600416183472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,fp8,0,0.21439999341964722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,64,0,1,fp8,fp8,0,0.3533173402150472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,float16,0,0.21204266945521036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,fp8,0,0.1448746621608734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,64,128,1,fp8,fp8,0,0.14123732844988504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,64,0,1,fp8,fp8,0,0.20388267437616983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,float16,0,0.11946666240692139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,float16,0,0.18754667043685913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,fp8,0,0.12177067001660664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,64,128,1,fp8,fp8,0,0.11514666676521301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,fp8,0,0.18922666708628336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,64,0,1,fp8,fp8,0,0.17831466595331827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,float16,0,0.12036800384521484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,float16,0,0.18967467546463013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,fp8,0,0.12339733044306438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,64,128,1,fp8,fp8,0,0.11877333124478658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,fp8,0,0.19097065925598145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,64,0,1,fp8,fp8,0,0.17905600865681967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,float16,0,0.12588799993197122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,float16,0,0.19405333201090494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,fp8,0,0.12731200456619263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,float16,0,0.13024533788363138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,64,128,1,fp8,fp8,0,0.12344533205032349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,fp8,0,0.19574934244155884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,64,0,1,fp8,fp8,0,0.18677333990732828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,float16,0,0.19944000244140625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,fp8,0,0.13365866740544638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,64,128,1,fp8,fp8,0,0.1301866670449575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,64,0,1,fp8,fp8,0,0.1930453379948934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,fp8,0,0.12173333764076233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,fp8,0,0.20237332582473755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,float16,0,0.07994133234024048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,float16,0,0.1113813320795695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,float16,0,0.11917866269747417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,fp8,0,0.08084266881148021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,fp8,0,0.11136000355084737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,64,128,1,fp8,fp8,0,0.0809333324432373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,64,0,1,fp8,fp8,0,0.11879466970761617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,float16,0,0.07225066423416138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,fp8,0,0.07236266632874806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,64,128,1,fp8,fp8,0,0.06840533514817555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,64,0,1,fp8,fp8,0,0.1051093339920044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,float16,0,0.07223466535409291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,float16,0,0.11117866635322571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,fp8,0,0.07357866565386455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,64,128,1,fp8,fp8,0,0.06945066650708516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,fp8,0,0.11317333579063416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,64,0,1,fp8,fp8,0,0.10505599776903789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,float16,0,0.0729066679875056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,float16,0,0.0753119985262553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,float16,0,0.11319466431935628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,fp8,0,0.07479999959468842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,64,128,1,fp8,fp8,0,0.07076799869537354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,fp8,0,0.11421333750089009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,64,0,1,fp8,fp8,0,0.10697600245475769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,float16,0,0.11519466837247212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,fp8,0,0.07629866898059845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,64,128,1,fp8,fp8,0,0.07260799904664357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,fp8,0,0.11728533109029134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,fp8,0,0.07665599882602692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,64,0,1,fp8,fp8,0,0.11079999804496765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,float16,0,0.0498986691236496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,float16,0,0.07668800155321757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,fp8,0,0.051258668303489685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,64,128,1,fp8,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,64,0,1,fp8,fp8,0,0.07267199953397115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,float16,0,0.04756799836953481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,float16,0,0.07461333274841309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,float16,0,0.07241599758466084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,64,128,1,fp8,fp8,0,0.04445866743723551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,64,128,1,fp8,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,fp8,0,0.07464000085989635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,64,0,1,fp8,fp8,0,0.07018133501211803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,float16,0,0.047637333472569786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,fp8,0,0.04651733239491781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,fp8,0,0.07452266911665599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,64,0,1,fp8,fp8,0,0.07014933228492737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,float16,0,0.04710400104522705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,float16,0,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,fp8,0,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,64,128,1,fp8,fp8,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,fp8,0,0.07419733206431071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,fp8,0,0.07625600198904674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,64,0,1,fp8,fp8,0,0.07022933165232341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,float16,0,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,float16,0,0.07451733450094859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,fp8,0,0.04986133178075155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,64,128,1,fp8,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,64,128,1,fp8,fp8,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,64,0,1,fp8,fp8,0,0.07043200234572093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,float16,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,float16,0,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,fp8,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,64,0,1,fp8,fp8,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,float16,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,float16,0,0.05468266705671946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,fp8,0,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,64,128,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,64,0,1,fp8,fp8,0,0.05163733164469401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,float16,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,float16,0,0.055248002211252846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,fp8,0,0.03741333385308584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,64,128,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,64,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,float16,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,float16,0,0.05505066613356272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,float16,0,0.054469332098960876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,64,128,1,fp8,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,fp8,0,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,64,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,float16,0,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,fp8,0,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,64,128,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,fp8,0,0.05518933137257894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,64,0,1,fp8,fp8,0,0.05165866514046987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,float16,0,0.6862186590830485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,fp8,0,0.6893333594004313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,64,128,1,fp8,fp8,0,0.6322720050811768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,float16,0,0.9117973645528158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,fp8,0,0.9138453006744385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,64,0,1,fp8,fp8,0,0.8429120381673177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,float16,0,0.7050933043162028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,64,128,1,fp8,fp8,0,0.6444053252538046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,float16,0,0.9299413363138834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,fp8,0,0.7088373502095541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,fp8,0,0.9320480028788248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,64,0,1,fp8,fp8,0,0.8552959760030111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,float16,0,0.7209440072377523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,fp8,0,0.7209440072377523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,float16,0,0.9444373448689779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,64,128,1,fp8,fp8,0,0.6603893438975016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,fp8,0,0.946997324625651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,64,0,1,fp8,fp8,0,0.869482676188151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,float16,0,0.7480800151824951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,64,128,1,fp8,fp8,0,0.6834133466084799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,fp8,0,0.7416853109995524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,float16,0,0.9662986596425375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,float16,0,0.3997386693954468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,fp8,0,0.9659039974212646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,float16,0,0.5168906847635905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,64,0,1,fp8,fp8,0,0.8945333162943522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,fp8,0,0.4017866849899292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,64,128,1,fp8,fp8,0,0.3806826670964559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,fp8,0,0.5220693349838257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,64,0,1,fp8,fp8,0,0.48924799760182697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,float16,0,0.34460266431172687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,fp8,0,0.4628746509552002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,float16,0,0.4601973295211792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,fp8,0,0.34725332260131836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,64,128,1,fp8,fp8,0,0.3237226605415344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,64,0,1,fp8,fp8,0,0.4321440060933431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,float16,0,0.35044801235198975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,float16,0,0.4652213255564372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,fp8,0,0.3530133167902629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,64,128,1,fp8,fp8,0,0.3296213348706563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,fp8,0,0.467029333114624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,64,0,1,fp8,fp8,0,0.43839999039967853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,float16,0,0.3601226806640625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,float16,0,0.4755680163701375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,fp8,0,0.36245866616566974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,64,128,1,fp8,fp8,0,0.33744001388549805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,fp8,0,0.47925865650177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,fp8,0,0.3744853337605794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,64,0,1,fp8,fp8,0,0.44731732209523517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,float16,0,0.3703626791636149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,float16,0,0.48687465985616046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,64,128,1,fp8,fp8,0,0.3491199811299642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,fp8,0,0.4915786584218343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,float16,0,0.207370658715566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,64,0,1,fp8,fp8,0,0.4573119878768921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,float16,0,0.27114667495091754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,fp8,0,0.2118133306503296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,64,128,1,fp8,fp8,0,0.20055466890335083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,fp8,0,0.27404266595840454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,64,0,1,fp8,fp8,0,0.25812800725301105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,float16,0,0.17625067631403604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,float16,0,0.2360159953435262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,fp8,0,0.177130659421285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,float16,0,0.23839465777079263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,64,128,1,fp8,fp8,0,0.17017066478729248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,fp8,0,0.2387626568476359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,64,0,1,fp8,fp8,0,0.22778133551279703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,float16,0,0.17787732680638632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,fp8,0,0.18101867039998373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,64,128,1,fp8,fp8,0,0.17250667015711466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,fp8,0,0.2416106661160787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,64,0,1,fp8,fp8,0,0.2308853268623352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,float16,0,0.183786670366923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,64,0,1,fp8,fp8,0,0.23633599281311035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,float16,0,0.1937333345413208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,float16,0,0.24592532714207968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,fp8,0,0.19543999433517456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,fp8,0,0.18625599145889282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,64,128,1,fp8,fp8,0,0.18523732821146646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,64,128,1,fp8,fp8,0,0.1786186695098877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,fp8,0,0.2487199902534485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,float16,0,0.2558559974034627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,fp8,0,0.25860265890757245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,64,0,1,fp8,fp8,0,0.24240533510843912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,float16,0,0.11141866445541382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,float16,0,0.1477226714293162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,fp8,0,0.11488533020019531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,64,128,1,fp8,fp8,0,0.1111893355846405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,fp8,0,0.14845333496729532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,64,0,1,fp8,fp8,0,0.14407466848691305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,float16,0,0.09662933150927226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,64,0,1,fp8,fp8,0,0.12184000015258789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,float16,0,0.1306719978650411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,fp8,0,0.09729599952697754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,64,128,1,fp8,fp8,0,0.09065066774686177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,fp8,0,0.13167466719945273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,float16,0,0.09680533409118652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,float16,0,0.13157866398493448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,fp8,0,0.09689066807428996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,64,128,1,fp8,fp8,0,0.0906986693541209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,fp8,0,0.09985599915186565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,fp8,0,0.13332800070444742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,64,0,1,fp8,fp8,0,0.12372266252835591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,float16,0,0.0976746678352356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,float16,0,0.13292266925175986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,float16,0,0.13569600383440653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,64,128,1,fp8,fp8,0,0.09462933739026387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,fp8,0,0.13362133502960205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,64,0,1,fp8,fp8,0,0.1269599994023641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,float16,0,0.1018986701965332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,fp8,0,0.10379733641942342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,64,128,1,fp8,fp8,0,0.10280533631642659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,fp8,0,0.1392159958680471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,fp8,0,0.08683733145395915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,64,0,1,fp8,fp8,0,0.1346666713555654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,float16,0,0.062458669145902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,float16,0,0.08454400300979614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,fp8,0,0.06452266871929169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,64,128,1,fp8,fp8,0,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,64,128,1,fp8,fp8,0,0.06193066636721293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,64,0,1,fp8,fp8,0,0.08343467116355896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,float16,0,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,float16,0,0.0794293334086736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,float16,0,0.08024533092975616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,fp8,0,0.05823466678460439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,fp8,0,0.08118399977684021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,64,0,1,fp8,fp8,0,0.0764213353395462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,64,0,1,fp8,fp8,0,0.07620800038178761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,float16,0,0.05845866600672404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,fp8,0,0.05823466678460439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,64,128,1,fp8,fp8,0,0.055626665552457176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,fp8,0,0.0811466674009959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,float16,0,0.06000000238418579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,float16,0,0.08076266447703044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,64,128,1,fp8,fp8,0,0.05709866682688395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,fp8,0,0.08278400202592213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,64,0,1,fp8,fp8,0,0.07620800038178761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,float16,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,float16,0,0.08250666658083598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,fp8,0,0.061941335598627724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,64,128,1,fp8,fp8,0,0.058634668588638306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,64,128,1,fp8,fp8,0,0.04273599882920583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,fp8,0,0.0825973351796468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,64,0,1,fp8,fp8,0,0.07864533364772797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,float16,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,float16,0,0.056794668237368263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,fp8,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,fp8,0,0.056549335519472756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,64,0,1,fp8,fp8,0,0.0537066658337911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,64,0,1,fp8,fp8,0,0.05192000170548757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,float16,0,0.04161600023508072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,float16,0,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,float16,0,0.054192001620928444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,fp8,0,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,64,128,1,fp8,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,fp8,0,0.05522666871547699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,float16,0,0.05386666456858317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,fp8,0,0.04252266883850098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,64,128,1,fp8,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,64,0,1,fp8,fp8,0,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,float16,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,float16,0,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,64,128,1,fp8,fp8,0,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,64,128,1,fp8,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,fp8,0,0.054341331124305725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,64,0,1,fp8,fp8,0,0.05243200063705444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,float16,0,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,float16,0,0.05588266750176748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,fp8,0,0.04316799839337667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,fp8,0,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,64,0,1,fp8,fp8,0,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,float16,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,float16,0,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,fp8,0,0.03312533348798752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,64,128,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,64,128,1,fp8,fp8,0,0.031141333281993866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,64,0,1,fp8,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,float16,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,float16,0,0.043568000197410583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,fp8,0,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,fp8,0,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,64,128,1,fp8,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,64,0,1,fp8,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,float16,0,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,float16,0,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,float16,0,0.043621331453323364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,fp8,0,0.04470400015513102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,64,0,1,fp8,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,float16,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,float16,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,64,128,1,fp8,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,64,0,1,fp8,fp8,0,0.04278400043646494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,64,0,1,fp8,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,float16,0,0.04426133135954539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,64,128,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,float16,0,0.8178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,float16,0,0.9590293566385905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,fp8,0,0.8122506936391195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,64,128,1,fp8,fp8,0,0.7655466397603353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,fp8,0,0.952239990234375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,64,0,1,fp8,fp8,0,0.8997120062510172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,float16,0,0.8217600186665853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,float16,0,0.9641119639078776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,fp8,0,0.8197759787241617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,64,128,1,fp8,fp8,0,0.7597333590189616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,fp8,0,0.962224006652832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,64,0,1,fp8,fp8,0,0.8959253629048666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,float16,0,0.8396480083465576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,float16,0,0.9802186489105225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,fp8,0,0.8327999909718832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,64,128,1,fp8,fp8,0,0.8774666786193848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,fp8,0,0.9815946420033773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,float16,0,0.8211999734242758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,64,0,1,fp8,fp8,0,1.0230666796366374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,fp8,0,0.8132800261179606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,float16,0,0.9639573097229004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,64,128,1,fp8,fp8,0,0.859386682510376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,float16,0,0.43248534202575684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,fp8,0,0.9510826269785563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,float16,0,0.5093066692352295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,fp8,0,0.4267093340555827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,fp8,0,0.502623995145162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,64,0,1,fp8,fp8,0,0.999946673711141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,64,128,1,fp8,fp8,0,0.43186132113138836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,64,0,1,fp8,fp8,0,0.4989706675211589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,64,128,1,fp8,fp8,0,0.38712000846862793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,float16,0,0.4188213348388672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,float16,0,0.4901119867960612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,fp8,0,0.4177279869715373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,fp8,0,0.4889119863510132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,64,0,1,fp8,fp8,0,0.4556639989217122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,float16,0,0.42051732540130615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,float16,0,0.49405332406361896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,fp8,0,0.4188479979832967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,64,128,1,fp8,fp8,0,0.39082666238149005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,fp8,0,0.4928906758626302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,64,0,1,fp8,fp8,0,0.4622986714045207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,float16,0,0.4288426637649536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,float16,0,0.502133329709371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,fp8,0,0.4280266761779785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,64,128,1,fp8,fp8,0,0.4288533528645833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,fp8,0,0.5005706548690796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,64,0,1,fp8,fp8,0,0.5021493434906006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,float16,0,0.4198506673177083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,float16,0,0.4964053233464559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,fp8,0,0.4882346789042155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,fp8,0,0.41577064990997314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,64,128,1,fp8,fp8,0,0.4205973148345947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,float16,0,0.22557334105173746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,64,0,1,fp8,fp8,0,0.49351998170216876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,float16,0,0.265066663424174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,fp8,0,0.22100265820821127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,64,128,1,fp8,fp8,0,0.22500266631444296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,64,128,1,fp8,fp8,0,0.19975467522939047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,fp8,0,0.2616320053736369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,64,0,1,fp8,fp8,0,0.2606933315594991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,float16,0,0.21991999944051108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,float16,0,0.216538667678833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,float16,0,0.2581706643104553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,float16,0,0.2560853362083435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,fp8,0,0.254037340482076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,fp8,0,0.21683200200398764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,64,0,1,fp8,fp8,0,0.23794132471084595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,fp8,0,0.21793599923451742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,64,128,1,fp8,fp8,0,0.20345600446065268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,fp8,0,0.25750933090845746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,64,0,1,fp8,fp8,0,0.24274667104085287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,float16,0,0.22445867458979288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,float16,0,0.26295467217763263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,fp8,0,0.22359466552734375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,64,128,1,fp8,fp8,0,0.2158986727396647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,fp8,0,0.26239466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,64,0,1,fp8,fp8,0,0.2525706688563029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,float16,0,0.21925866603851318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,float16,0,0.2589226762453715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,fp8,0,0.21759466330210367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,64,128,1,fp8,fp8,0,0.21394666035970053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,fp8,0,0.2563680013020833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,64,0,1,fp8,fp8,0,0.2532106637954712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,float16,0,0.12150399883588155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,float16,0,0.1439253290494283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,fp8,0,0.11935999989509583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,64,128,1,fp8,fp8,0,0.12203733126322429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,fp8,0,0.1418880025545756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,64,0,1,fp8,fp8,0,0.14203199744224548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,float16,0,0.11520533760388692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,float16,0,0.13467199603716531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,fp8,0,0.11320533355077107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,64,128,1,fp8,fp8,0,0.10738133390744527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,fp8,0,0.11525866389274597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,fp8,0,0.13541332880655924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,64,0,1,fp8,fp8,0,0.1283253331979116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,64,0,1,fp8,fp8,0,0.12949867049853006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,float16,0,0.11668800314267476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,float16,0,0.13733333349227905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,float16,0,0.14006933569908142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,64,128,1,fp8,fp8,0,0.10915199915568034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,fp8,0,0.13609600067138672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,float16,0,0.11746133367220561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,fp8,0,0.11890133221944173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,64,128,1,fp8,fp8,0,0.11541866262753804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,fp8,0,0.13827733198801676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,64,0,1,fp8,fp8,0,0.13588266571362814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,float16,0,0.11820266644159953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,fp8,0,0.11756799618403117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,float16,0,0.07838400204976399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,float16,0,0.14004266262054443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,64,128,1,fp8,fp8,0,0.11405332883199056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,fp8,0,0.13766933480898538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,64,0,1,fp8,fp8,0,0.13523733615875244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,float16,0,0.06637866795063019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,fp8,0,0.06716800232728322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,64,128,1,fp8,fp8,0,0.06842666864395142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,64,128,1,fp8,fp8,0,0.06156266729036967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,fp8,0,0.07855999966462453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,64,0,1,fp8,fp8,0,0.08183999856313069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,float16,0,0.06427200138568878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,float16,0,0.07655466596285503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,64,128,1,fp8,fp8,0,0.06196799874305725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,fp8,0,0.07660266757011414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,fp8,0,0.07665599882602692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,64,0,1,fp8,fp8,0,0.07251733541488647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,float16,0,0.06579733391602834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,float16,0,0.07764266431331635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,fp8,0,0.06421866516272227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,64,0,1,fp8,fp8,0,0.07429333527882893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,float16,0,0.066021333138148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,float16,0,0.07870399951934814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,64,128,1,fp8,fp8,0,0.06403199831644694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,64,128,1,fp8,fp8,0,0.06440000236034393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,fp8,0,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,64,0,1,fp8,fp8,0,0.07512533167997996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,float16,0,0.06449600060780843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,float16,0,0.07828799883524577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,fp8,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,64,128,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,fp8,0,0.05157866577307383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,64,0,1,fp8,fp8,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,fp8,0,0.07634666562080383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,64,0,1,fp8,fp8,0,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,float16,0,0.04135466615358988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,float16,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,float16,0,0.04045333216587702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,float16,0,0.05060799916585287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,64,128,1,fp8,fp8,0,0.0383146678407987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,64,128,1,fp8,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,64,0,1,fp8,fp8,0,0.048112000028292336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,float16,0,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,float16,0,0.05161599814891815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,fp8,0,0.05161066850026449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,64,0,1,fp8,fp8,0,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,float16,0,0.04140799989302953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,float16,0,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,float16,0,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,64,128,1,fp8,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,fp8,0,0.05175999800364176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,64,0,1,fp8,fp8,0,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,64,128,1,fp8,fp8,0,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,64,128,1,fp8,fp8,0,0.026848000784715016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,fp8,0,0.051141331593195595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,64,0,1,fp8,fp8,0,0.051221330960591636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,float16,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,64,0,1,fp8,fp8,0,0.03182400017976761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,float16,0,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,fp8,0,0.02698666602373123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,64,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,fp8,0,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,64,0,1,fp8,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,float16,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,float16,0,0.03221333275238673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,64,128,1,fp8,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,64,0,1,fp8,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,float16,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,float16,0,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,float16,0,0.03254933406909307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,64,128,1,fp8,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,64,128,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,fp8,0,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,64,0,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,float16,0,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,64,0,1,fp8,fp8,0,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,64,128,1,fp8,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,64,0,1,fp8,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,fp8,0,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,64,0,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,64,0,1,fp8,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,float16,0,0.02162133405605952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,fp8,0,0.027855999767780304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,fp8,0,0.02269333352645238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,fp8,0,0.028442665934562683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,64,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,64,0,1,fp8,fp8,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,float16,0,0.7904106775919596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,float16,0,0.8012266953786215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,fp8,0,0.7856372992197672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,64,128,1,fp8,fp8,0,0.7364959716796875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,fp8,0,0.7946613629659017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,64,0,1,fp8,fp8,0,0.7516533533732096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,float16,0,0.7991893291473389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,float16,0,0.8066293398539225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,fp8,0,0.7921813329060873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,64,128,1,fp8,fp8,0,0.7369013627370199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,fp8,0,0.7996160189310709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,64,0,1,fp8,fp8,0,0.7474079926808676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,float16,0,0.8187626997629801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,float16,0,0.8249173164367676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,fp8,0,0.8147573471069336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,64,128,1,fp8,fp8,0,0.8634880383809408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,fp8,0,0.815333366394043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,64,0,1,fp8,fp8,0,0.8751359780629476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,float16,0,0.7965760231018066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,float16,0,0.8071466286977133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,fp8,0,0.7879839738210043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,float16,0,0.41783467928568524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,64,128,1,fp8,fp8,0,0.846448024113973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,fp8,0,0.795525312423706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,float16,0,0.42574934164683026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,64,0,1,fp8,fp8,0,0.8545119762420654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,fp8,0,0.4127360184987386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,64,128,1,fp8,fp8,0,0.4174773295720418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,fp8,0,0.4193386634190877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,64,0,1,fp8,fp8,0,0.42499732971191406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,float16,0,0.40356266498565674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,float16,0,0.40934399763743085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,fp8,0,0.40318934122721356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,float16,0,0.4097919861475627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,64,128,1,fp8,fp8,0,0.37413867314656574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,fp8,0,0.40649600823720294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,64,0,1,fp8,fp8,0,0.38094933827718097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,float16,0,0.41211732228597003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,fp8,0,0.40651198228200275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,64,128,1,fp8,fp8,0,0.3795040051142375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,fp8,0,0.41139201323191327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,64,0,1,fp8,fp8,0,0.3854026794433594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,float16,0,0.416159987449646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,float16,0,0.4215786854426066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,fp8,0,0.41441599527994794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,64,128,1,fp8,fp8,0,0.42165335019429523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,fp8,0,0.4190773169199626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,64,0,1,fp8,fp8,0,0.4264479875564575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,fp8,0,0.40386664867401123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,64,128,1,fp8,fp8,0,0.4133760134379069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,float16,0,0.4082506497701009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,float16,0,0.41139201323191327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,fp8,0,0.40675199031829834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,float16,0,0.21837866306304932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,64,0,1,fp8,fp8,0,0.41782931486765545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,float16,0,0.22274132569630942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,fp8,0,0.21384533246358237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,64,128,1,fp8,fp8,0,0.21783999601999918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,fp8,0,0.21778666973114014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,64,0,1,fp8,fp8,0,0.22195732593536377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,float16,0,0.21126399437586466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,float16,0,0.21188799540201822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,fp8,0,0.21075733502705893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,64,128,1,fp8,fp8,0,0.19504000743230185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,fp8,0,0.21150932709376016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,fp8,0,0.2123946746190389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,64,128,1,fp8,fp8,0,0.19908267259597778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,64,0,1,fp8,fp8,0,0.1989333430926005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,fp8,0,0.21419199307759604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,float16,0,0.212501327196757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,float16,0,0.21569067239761353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,64,0,1,fp8,fp8,0,0.20113599300384521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,float16,0,0.21781333287556967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,float16,0,0.21951999266942343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,fp8,0,0.21647467215855917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,64,128,1,fp8,fp8,0,0.20933866500854492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,64,0,1,fp8,fp8,0,0.21522132555643717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,fp8,0,0.21760000785191855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,float16,0,0.21368000904719034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,float16,0,0.21587733427683511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,fp8,0,0.21159466107686362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,64,128,1,fp8,fp8,0,0.20987200736999512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,fp8,0,0.2135146657625834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,float16,0,0.11893866459528606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,64,0,1,fp8,fp8,0,0.21046932538350424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,float16,0,0.1209386686484019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,fp8,0,0.11693867047627766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,64,128,1,fp8,fp8,0,0.11893332997957866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,fp8,0,0.11923199892044067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,64,0,1,fp8,fp8,0,0.12096533179283142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,float16,0,0.11121066411336263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,float16,0,0.11338667074839275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,fp8,0,0.10909866293271382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,fp8,0,0.11108799775441487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,64,128,1,fp8,fp8,0,0.10355200370152791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,fp8,0,0.11244266231854756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,64,0,1,fp8,fp8,0,0.10626666744550069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,float16,0,0.11476266384124756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,float16,0,0.11180800199508667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,float16,0,0.11541333794593811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,64,128,1,fp8,fp8,0,0.11276800433794658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,64,128,1,fp8,fp8,0,0.10600533088048299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,64,0,1,fp8,fp8,0,0.11422399679819743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,fp8,0,0.11310399572054546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,64,0,1,fp8,fp8,0,0.10733333230018616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,float16,0,0.1162506639957428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,fp8,0,0.11332799990971883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,fp8,0,0.11729600032170613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,float16,0,0.11523733536402385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,float16,0,0.11556266744931538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,fp8,0,0.11371733744939168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,fp8,0,0.06409599880377452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,64,128,1,fp8,fp8,0,0.11291199922561646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,fp8,0,0.11546132961908977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,64,0,1,fp8,fp8,0,0.11380267143249512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,float16,0,0.06473066906134288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,float16,0,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,64,128,1,fp8,fp8,0,0.06603200236956279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,64,128,1,fp8,fp8,0,0.05996799965699514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,fp8,0,0.06400533517201741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,64,0,1,fp8,fp8,0,0.05983999868233999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,64,0,1,fp8,fp8,0,0.06810133159160614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,float16,0,0.06259199976921082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,float16,0,0.06397333244482677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,64,128,1,fp8,fp8,0,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,fp8,0,0.06331199904282887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,fp8,0,0.0639626681804657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,float16,0,0.06404800216356914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,float16,0,0.06506133576234181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,float16,0,0.06412800153096516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,fp8,0,0.06392533580462138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,fp8,0,0.06261333326498668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,64,0,1,fp8,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,64,0,1,fp8,fp8,0,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,float16,0,0.06399466594060262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,64,128,1,fp8,fp8,0,0.06204266846179962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,fp8,0,0.06555733581384023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,float16,0,0.06392533580462138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,float16,0,0.06256533165772755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,fp8,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,64,128,1,fp8,fp8,0,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,fp8,0,0.06253333389759064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,64,0,1,fp8,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,float16,0,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,float16,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,64,128,1,fp8,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,fp8,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,64,0,1,fp8,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,float16,0,0.03931200007597605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,64,0,1,fp8,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,float16,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,64,128,1,fp8,fp8,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,64,128,1,fp8,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,64,0,1,fp8,fp8,0,0.04142399877309799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,float16,0,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,float16,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,float16,0,0.043103997906049095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,64,128,1,fp8,fp8,0,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,fp8,0,0.043706665436426796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,64,0,1,fp8,fp8,0,0.04168533285458883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,64,0,1,fp8,fp8,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,float16,0,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,float16,0,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,64,128,1,fp8,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,fp8,0,0.039877332746982574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,float16,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,64,128,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,64,0,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,float16,0,0.026895999908447266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,64,128,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,64,0,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,float16,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,fp8,0,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,64,128,1,fp8,fp8,0,0.025765334566434223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,64,0,1,fp8,fp8,0,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,float16,0,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,64,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,64,128,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,64,0,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,float16,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,fp8,0,0.022287999590237934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,64,128,1,fp8,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,64,0,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,fp8,0,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,64,0,1,fp8,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,64,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,fp8,0,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,64,128,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,64,0,1,fp8,fp8,0,0.021989333132902782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,fp8,0,0.022389332453409832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,fp8,0,0.02199999988079071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,64,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,float16,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,64,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,64,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,64,128,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,64,0,1,fp8,fp8,0,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,64,128,1,fp8,fp8,0,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,64,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,fp8,0,0.02205866575241089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,64,0,1,fp8,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,float16,0,0.3773333231608073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,float16,0,0.37171733379364014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,fp8,0,0.37512000401814777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,64,128,1,fp8,fp8,0,0.3476266860961914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,fp8,0,0.3680533170700073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,float16,0,0.3816213210423787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,64,0,1,fp8,fp8,0,0.34112000465393066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,fp8,0,0.38020265102386475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,float16,0,0.3738880157470703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,64,128,1,fp8,fp8,0,0.353279987970988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,float16,0,0.39055999120076496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,float16,0,0.3816746473312378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,fp8,0,0.37139201164245605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,64,0,1,fp8,fp8,0,0.3456053336461385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,fp8,0,0.38734932740529376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,64,128,1,fp8,fp8,0,0.3912800153096517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,float16,0,0.3789973258972168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,fp8,0,0.3798826535542806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,64,0,1,fp8,fp8,0,0.3836373488108317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,float16,0,0.3714453379313151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,fp8,0,0.3750986655553182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,64,128,1,fp8,fp8,0,0.389296015103658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,fp8,0,0.36904533704121906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,float16,0,0.2042506734530131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,64,0,1,fp8,fp8,0,0.37753601868947345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,float16,0,0.19910933574040732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,fp8,0,0.2012373407681783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,64,128,1,fp8,fp8,0,0.2053333322207133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,fp8,0,0.19631467262903848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,64,128,1,fp8,fp8,0,0.18184000253677368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,fp8,0,0.1901599963506063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,64,0,1,fp8,fp8,0,0.2009226679801941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,float16,0,0.19503466288248697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,float16,0,0.19747199614842734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,float16,0,0.19105066855748495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,fp8,0,0.19317867358525595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,64,0,1,fp8,fp8,0,0.17865065733591715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,float16,0,0.19343467553456625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,fp8,0,0.19573867321014404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,64,128,1,fp8,fp8,0,0.18449600537618002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,fp8,0,0.19399466117223105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,64,128,1,fp8,fp8,0,0.1954186757405599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,64,0,1,fp8,fp8,0,0.17972799142201742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,float16,0,0.20195200045903525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,float16,0,0.1973386605580648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,fp8,0,0.2005013426144918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,fp8,0,0.1968266765276591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,64,0,1,fp8,fp8,0,0.1920479933420817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,fp8,0,0.1912426749865214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,float16,0,0.19751467307408652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,float16,0,0.19337600469589233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,fp8,0,0.19477333625157675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,64,128,1,fp8,fp8,0,0.19507733980814615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,64,0,1,fp8,fp8,0,0.19160000483194986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,float16,0,0.10975999633471172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,float16,0,0.10732799768447876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,fp8,0,0.10942400495211284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,64,128,1,fp8,fp8,0,0.11018133163452148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,fp8,0,0.10659733414649963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,64,0,1,fp8,fp8,0,0.10905599594116211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,float16,0,0.10293333729108174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,float16,0,0.10072533289591472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,fp8,0,0.10082667072614034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,64,128,1,fp8,fp8,0,0.09479467074076335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,fp8,0,0.09910399715105693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,64,0,1,fp8,fp8,0,0.09297066926956177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,64,0,1,fp8,fp8,0,0.09672533472379048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,float16,0,0.10296533505121867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,float16,0,0.10198400417963664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,fp8,0,0.1018346647421519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,64,128,1,fp8,fp8,0,0.09868799646695454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,fp8,0,0.10108266274134318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,float16,0,0.10670933127403259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,float16,0,0.10393599669138591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,fp8,0,0.10528533657391866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,64,128,1,fp8,fp8,0,0.10296000043551128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,fp8,0,0.10319999853769939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,64,0,1,fp8,fp8,0,0.10116266210873921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,float16,0,0.1055519978205363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,float16,0,0.10318932930628459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,fp8,0,0.10446400443712871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,float16,0,0.059845333298047386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,fp8,0,0.060122668743133545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,64,128,1,fp8,fp8,0,0.10427733262379964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,fp8,0,0.10295466581980388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,64,0,1,fp8,fp8,0,0.1030346651871999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,float16,0,0.05952533086140951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,float16,0,0.05990933378537496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,64,128,1,fp8,fp8,0,0.06425599753856659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,fp8,0,0.05781333148479462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,64,0,1,fp8,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,float16,0,0.05780800183614095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,fp8,0,0.05816000203291575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,64,128,1,fp8,fp8,0,0.055914665261904396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,64,0,1,fp8,fp8,0,0.05470400055249532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,float16,0,0.05862933397293091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,64,0,1,fp8,fp8,0,0.05570666491985321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,float16,0,0.05982933441797892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,float16,0,0.05798399945100149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,fp8,0,0.06018133461475372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,64,128,1,fp8,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,fp8,0,0.05780800183614095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,float16,0,0.05978666742642721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,fp8,0,0.059978668888409935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,64,128,1,fp8,fp8,0,0.058117335041364036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,fp8,0,0.059765333930651345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,fp8,0,0.059903999169667564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,64,0,1,fp8,fp8,0,0.057802667220433555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,64,0,1,fp8,fp8,0,0.05778133372465769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,float16,0,0.058277333776156105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,float16,0,0.057114665706952415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,64,128,1,fp8,fp8,0,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,fp8,0,0.05816000203291575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,float16,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,float16,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,64,128,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,fp8,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,64,0,1,fp8,fp8,0,0.037445334096749626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,float16,0,0.03809600075085958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,fp8,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,64,128,1,fp8,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,fp8,0,0.03661333272854487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,64,0,1,fp8,fp8,0,0.0345920001467069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,float16,0,0.037834666669368744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,float16,0,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,64,128,1,fp8,fp8,0,0.03554133325815201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,64,0,1,fp8,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,64,0,1,fp8,fp8,0,0.03626666714747747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,float16,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,float16,0,0.03844800094763438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,float16,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,64,128,1,fp8,fp8,0,0.03878933439652125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,fp8,0,0.03902400036652883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,64,128,1,fp8,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,fp8,0,0.03729599962631861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,float16,0,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,64,128,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,fp8,0,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,fp8,0,0.03730666637420654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,64,0,1,fp8,fp8,0,0.03636800001064936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,fp8,0,0.02402133246262868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,64,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,fp8,0,0.024133334557215374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,64,128,1,fp8,fp8,0,0.02481066683928172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,64,0,1,fp8,fp8,0,0.024010665714740753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,float16,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,float16,0,0.02437866727511088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,64,128,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,64,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,64,128,1,fp8,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,64,0,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,float16,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,float16,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,64,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,64,128,1,fp8,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,64,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,64,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,float16,0,0.02082666630546252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,64,128,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,64,0,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,fp8,0,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,64,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,fp8,0,0.02073066681623459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,64,128,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,64,128,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,64,128,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,64,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,64,128,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,float16,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,64,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,64,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,64,0,1,fp8,fp8,0,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,float16,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,float16,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,float16,0,0.2051680088043213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,float16,0,0.20538665850957236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,fp8,0,0.20393067598342896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,64,128,1,fp8,fp8,0,0.19542400042215982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,fp8,0,0.20466132958730063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,64,0,1,fp8,fp8,0,0.19589332739512125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,float16,0,0.20830933252970377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,float16,0,0.20842132965723673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,fp8,0,0.20669867595036825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,64,128,1,fp8,fp8,0,0.1912213365236918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,fp8,0,0.20721600453058878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,64,0,1,fp8,fp8,0,0.19264533122380575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,float16,0,0.21514666080474854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,float16,0,0.21451733509699503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,fp8,0,0.21313599745432535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,64,128,1,fp8,fp8,0,0.20602667331695557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,fp8,0,0.21182399988174438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,64,0,1,fp8,fp8,0,0.20598934094111124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,float16,0,0.20846933126449585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,float16,0,0.20943999290466309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,fp8,0,0.2063573400179545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,64,128,1,fp8,fp8,0,0.20867733160654703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,fp8,0,0.20594666401545206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,64,0,1,fp8,fp8,0,0.20589866240819296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,64,128,1,fp8,fp8,0,0.11493333180745442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,float16,0,0.11517866452534993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,float16,0,0.11543466647466023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,fp8,0,0.11447999874750774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,fp8,0,0.11314666271209717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,64,0,1,fp8,fp8,0,0.11411733428637187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,float16,0,0.107232004404068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,fp8,0,0.1062506635983785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,float16,0,0.10709333419799805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,fp8,0,0.10703466335932414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,64,128,1,fp8,fp8,0,0.10282133022944133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,64,0,1,fp8,fp8,0,0.10090133547782898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,float16,0,0.10930666327476501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,float16,0,0.10777067144711812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,fp8,0,0.10777067144711812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,64,128,1,fp8,fp8,0,0.1030613382657369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,fp8,0,0.10900266965230306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,64,0,1,fp8,fp8,0,0.10079999764760335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,float16,0,0.11311999956766765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,float16,0,0.11313600341478984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,fp8,0,0.1114026705423991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,64,128,1,fp8,fp8,0,0.1104693313439687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,fp8,0,0.11094933748245239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,64,0,1,fp8,fp8,0,0.10910933216412862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,float16,0,0.1111199955145518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,float16,0,0.110042671362559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,fp8,0,0.110042671362559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,64,128,1,fp8,fp8,0,0.10911466677983601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,fp8,0,0.10922666390736897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,64,0,1,fp8,fp8,0,0.1106826663017273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,float16,0,0.060453335444132485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,float16,0,0.061386664708455406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,fp8,0,0.060565332571665444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,64,128,1,fp8,fp8,0,0.06409599880377452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,fp8,0,0.059802666306495667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,fp8,0,0.059845333298047386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,64,0,1,fp8,fp8,0,0.05684266487757365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,64,0,1,fp8,fp8,0,0.06247466802597046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,float16,0,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,float16,0,0.058821335434913635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,fp8,0,0.059903999169667564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,fp8,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,64,128,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,float16,0,0.05996266504128774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,float16,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,fp8,0,0.06165333092212677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,fp8,0,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,64,128,1,fp8,fp8,0,0.05773866673310598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,64,0,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,float16,0,0.06190933287143707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,float16,0,0.05982399980227152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,float16,0,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,64,128,1,fp8,fp8,0,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,fp8,0,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,64,0,1,fp8,fp8,0,0.058549334605534874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,float16,0,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,float16,0,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,fp8,0,0.05894400179386139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,64,128,1,fp8,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,64,128,1,fp8,fp8,0,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,fp8,0,0.05886933207511902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,64,0,1,fp8,fp8,0,0.059104000528653465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,float16,0,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,64,128,1,fp8,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,fp8,0,0.0395413339138031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,fp8,0,0.0390133336186409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,64,0,1,fp8,fp8,0,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,float16,0,0.03940266619126002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,fp8,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,64,0,1,fp8,fp8,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,float16,0,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,float16,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,float16,0,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,fp8,0,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,64,128,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,fp8,0,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,64,0,1,fp8,fp8,0,0.03738666574160258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,float16,0,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,64,128,1,fp8,fp8,0,0.03915199885765711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,fp8,0,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,64,0,1,fp8,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,float16,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,float16,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,64,128,1,fp8,fp8,0,0.03847466657559077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,64,0,1,fp8,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,float16,0,0.026208000878492992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,float16,0,0.026208000878492992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,fp8,0,0.026320000489552815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,64,128,1,fp8,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,fp8,0,0.026447998980681103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,64,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,64,128,1,fp8,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,64,0,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,float16,0,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,fp8,0,0.02622933437426885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,64,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,64,128,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,64,0,1,fp8,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,float16,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,64,128,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,64,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,float16,0,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,64,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,float16,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,float16,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,float16,0,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,64,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,64,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,float16,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,64,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,fp8,0,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,float16,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,float16,0,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,float16,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,64,128,1,fp8,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,64,0,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,fp8,0,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,float16,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,fp8,0,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,64,128,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,fp8,0,0.14588800072669983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,float16,0,0.1458506683508555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,float16,0,0.1472640037536621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,64,128,1,fp8,fp8,0,0.13768000404040018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,fp8,0,0.1460693379243215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,64,0,1,fp8,fp8,0,0.13774933417638144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,float16,0,0.14806399742762247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,float16,0,0.1479039986928304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,fp8,0,0.14600533246994019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,64,128,1,fp8,fp8,0,0.13802133003870645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,fp8,0,0.14782399932543436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,64,0,1,fp8,fp8,0,0.13800533612569174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,float16,0,0.15037332971890768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,float16,0,0.15026666720708212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,fp8,0,0.14800533652305603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,64,128,1,fp8,fp8,0,0.14479466279347739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,fp8,0,0.14844800035158792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,64,0,1,fp8,fp8,0,0.14387733737627664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,float16,0,0.15000533064206442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,float16,0,0.14985066652297974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,fp8,0,0.14959999918937683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,64,128,1,fp8,fp8,0,0.14628799756368002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,fp8,0,0.14870933691660562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,64,0,1,fp8,fp8,0,0.14499732851982117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,float16,0,0.08216000099976857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,float16,0,0.0823520024617513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,fp8,0,0.08046933511892955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,64,128,1,fp8,fp8,0,0.08210666477680206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,fp8,0,0.0802400012811025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,64,0,1,fp8,fp8,0,0.08201600114504497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,float16,0,0.07903466622034709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,float16,0,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,fp8,0,0.07941866914431255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,64,128,1,fp8,fp8,0,0.07457066575686137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,fp8,0,0.07863999903202057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,64,0,1,fp8,fp8,0,0.07426133255163829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,float16,0,0.07866133252779643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,float16,0,0.07990399996439616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,float16,0,0.08045333127180736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,64,128,1,fp8,fp8,0,0.07445333401362102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,fp8,0,0.0784693310658137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,64,0,1,fp8,fp8,0,0.07453866799672444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,float16,0,0.08066133161385854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,fp8,0,0.0783786674340566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,64,128,1,fp8,fp8,0,0.07738133271535237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,fp8,0,0.0786240001519521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,64,0,1,fp8,fp8,0,0.07629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,float16,0,0.08038933575153351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,float16,0,0.07843199868996938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,fp8,0,0.07938133180141449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,64,128,1,fp8,fp8,0,0.07682666679223378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,fp8,0,0.0782773345708847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,64,0,1,fp8,fp8,0,0.07694399853547414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,64,0,1,fp8,fp8,0,0.04715733230113983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,float16,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,float16,0,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,fp8,0,0.04757866760094961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,64,128,1,fp8,fp8,0,0.045642669002215065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,64,128,1,fp8,fp8,0,0.04709866642951965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,float16,0,0.04766400158405304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,float16,0,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,float16,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,fp8,0,0.04762133459250132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,fp8,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,64,0,1,fp8,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,64,0,1,fp8,fp8,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,float16,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,fp8,0,0.04756799836953481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,64,128,1,fp8,fp8,0,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,fp8,0,0.04734933376312256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,float16,0,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,float16,0,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,float16,0,0.04758933186531067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,fp8,0,0.047237331668535866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,64,128,1,fp8,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,fp8,0,0.04794133206208547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,fp8,0,0.04785066843032837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,float16,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,64,0,1,fp8,fp8,0,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,float16,0,0.046426668763160706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,fp8,0,0.04791999856630961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,64,128,1,fp8,fp8,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,64,0,1,fp8,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,float16,0,0.031301334500312805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,64,128,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,fp8,0,0.031157332162062328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,64,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,float16,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,float16,0,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,64,128,1,fp8,fp8,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,64,0,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,float16,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,float16,0,0.030154667794704437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,64,128,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,fp8,0,0.030133334298928578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,64,0,1,fp8,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,float16,0,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,64,128,1,fp8,fp8,0,0.031178665657838184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,64,0,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,float16,0,0.03089066594839096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,float16,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,64,128,1,fp8,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,64,0,1,fp8,fp8,0,0.029765332738558452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,64,128,1,fp8,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,64,128,1,fp8,fp8,0,0.02021866664290428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,64,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,float16,0,0.02086399992307027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,64,128,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,64,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,64,128,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,64,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,64,0,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,64,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,64,128,1,fp8,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,64,0,1,fp8,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,float16,0,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,float16,0,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,float16,0,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,float16,0,0.016602666427691776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,float16,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,64,128,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,64,0,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,64,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,float16,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,64,128,1,fp8,fp8,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,64,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,float16,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,64,0,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,float16,0,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,64,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,fp8,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,float16,0,0.11764267086982727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,fp8,0,0.11737066507339478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,float16,0,0.1172160009543101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,fp8,0,0.11755200227101643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,64,128,1,fp8,fp8,0,0.1092693308989207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,64,0,1,fp8,fp8,0,0.10925867160161336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,64,128,1,fp8,fp8,0,0.11015466849009196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,float16,0,0.11774933338165283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,float16,0,0.11762133240699768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,fp8,0,0.11734400192896526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,fp8,0,0.11748266220092773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,64,0,1,fp8,fp8,0,0.10941333572069804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,float16,0,0.11748799681663513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,float16,0,0.11917333801587422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,fp8,0,0.1193386713663737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,64,128,1,fp8,fp8,0,0.11211733023325603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,fp8,0,0.11758933464686076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,64,0,1,fp8,fp8,0,0.11283199985822041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,float16,0,0.11923733353614807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,float16,0,0.1190720001856486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,fp8,0,0.11754666765530904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,64,128,1,fp8,fp8,0,0.11249599854151408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,fp8,0,0.11857600013415019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,64,0,1,fp8,fp8,0,0.11351466178894043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,float16,0,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,float16,0,0.06748799979686737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,fp8,0,0.06811733543872833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,fp8,0,0.06607466439406078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,64,128,1,fp8,fp8,0,0.0662613312403361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,fp8,0,0.06613866488138835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,fp8,0,0.06622933348019917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,64,0,1,fp8,fp8,0,0.06577600042025249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,float16,0,0.06598933537801106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,fp8,0,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,float16,0,0.06621333460013072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,64,128,1,fp8,fp8,0,0.0641546646753947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,fp8,0,0.06617600222428639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,64,0,1,fp8,fp8,0,0.06256533165772755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,float16,0,0.06629866858323415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,float16,0,0.06605333089828491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,64,128,1,fp8,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,64,0,1,fp8,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,float16,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,float16,0,0.06614933411280315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,fp8,0,0.06658666829268138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,64,128,1,fp8,fp8,0,0.06421866516272227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,64,128,1,fp8,fp8,0,0.0643146683772405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,fp8,0,0.06782933572928111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,fp8,0,0.06622399886449178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,64,0,1,fp8,fp8,0,0.06434666613737743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,float16,0,0.0662666658560435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,float16,0,0.06743466854095459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,64,0,1,fp8,fp8,0,0.06404800216356914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,float16,0,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,float16,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,64,128,1,fp8,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,64,128,1,fp8,fp8,0,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,64,0,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,float16,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,float16,0,0.03919466584920883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,fp8,0,0.03893866638342539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,64,128,1,fp8,fp8,0,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,fp8,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,64,0,1,fp8,fp8,0,0.037530665596326195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,float16,0,0.039349332451820374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,float16,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,float16,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,fp8,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,64,0,1,fp8,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,float16,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,fp8,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,64,128,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,64,0,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,float16,0,0.03932266682386398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,64,128,1,fp8,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,64,128,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,64,0,1,fp8,fp8,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,float16,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,float16,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,64,128,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,64,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,float16,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,float16,0,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,64,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,64,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,float16,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,float16,0,0.02587733417749405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,fp8,0,0.026250667870044708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,64,128,1,fp8,fp8,0,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,64,0,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,64,128,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,fp8,0,0.026421333352724712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,64,0,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,float16,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,64,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,64,128,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,64,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,64,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,64,0,1,fp8,fp8,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,float16,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,64,128,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,float16,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,64,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,64,0,1,fp8,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,64,0,1,fp8,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,fp8,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,float16,0,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,float16,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,fp8,0,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,fp8,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,64,128,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,64,128,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,fp8,0,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,64,128,1,fp8,fp8,0,0.09915199875831604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,float16,0,0.1053493320941925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,float16,0,0.105295995871226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,fp8,0,0.1050879955291748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,fp8,0,0.10507733623186748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,64,0,1,fp8,fp8,0,0.09884799520174663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,fp8,0,0.10494933525721233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,float16,0,0.1050933301448822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,float16,0,0.1048906644185384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,fp8,0,0.10525866349538167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,fp8,0,0.1048479974269867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,64,128,1,fp8,fp8,0,0.09910399715105693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,64,0,1,fp8,fp8,0,0.0988213320573171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,float16,0,0.1058240036169688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,float16,0,0.10564800103505452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,64,128,1,fp8,fp8,0,0.10014933347702026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,fp8,0,0.10563733180363973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,64,128,1,fp8,fp8,0,0.10148266951243083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,fp8,0,0.10492266217867534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,64,0,1,fp8,fp8,0,0.10078932841618855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,float16,0,0.10506666700045268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,float16,0,0.1053600013256073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,fp8,0,0.10681600371996562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,float16,0,0.05825600028038025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,64,0,1,fp8,fp8,0,0.10060266653696696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,float16,0,0.05805333455403646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,fp8,0,0.058176000912984215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,float16,0,0.057989334066708885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,64,128,1,fp8,fp8,0,0.05620799958705902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,64,0,1,fp8,fp8,0,0.0562720000743866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,float16,0,0.05778666834036509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,float16,0,0.05791999896367391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,fp8,0,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,64,128,1,fp8,fp8,0,0.054154664278030396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,fp8,0,0.058602665861447654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,64,0,1,fp8,fp8,0,0.05681066711743673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,float16,0,0.057999998331069946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,fp8,0,0.05957333246866862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,64,128,1,fp8,fp8,0,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,fp8,0,0.05985066791375478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,64,0,1,fp8,fp8,0,0.05444266895453135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,float16,0,0.05982399980227152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,float16,0,0.059818665186564125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,fp8,0,0.058277333776156105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,64,128,1,fp8,fp8,0,0.05751466751098633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,fp8,0,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,64,0,1,fp8,fp8,0,0.05751466751098633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,float16,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,float16,0,0.05823466678460439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,fp8,0,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,64,128,1,fp8,fp8,0,0.05589866638183594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,fp8,0,0.0594400018453598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,64,0,1,fp8,fp8,0,0.056128000219662987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,float16,0,0.03575466573238373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,64,128,1,fp8,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,64,0,1,fp8,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,float16,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,float16,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,float16,0,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,64,128,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,64,0,1,fp8,fp8,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,fp8,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,64,128,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,64,128,1,fp8,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,64,0,1,fp8,fp8,0,0.03349866718053818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,64,0,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,float16,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,fp8,0,0.03532266616821289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,float16,0,0.035301332672437034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,64,0,1,fp8,fp8,0,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,float16,0,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,float16,0,0.03551999976237615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,64,128,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,64,128,1,fp8,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,64,128,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,64,128,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,64,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,float16,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,64,0,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,float16,0,0.024864000578721363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,64,128,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,64,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,float16,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,float16,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,float16,0,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,64,128,1,fp8,fp8,0,0.02402133246262868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,64,0,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,64,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,float16,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,64,128,1,fp8,fp8,0,0.01777600000301997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,fp8,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,64,0,1,fp8,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,float16,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,float16,0,0.018191999445358913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,float16,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,float16,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,64,128,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,float16,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,float16,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,64,128,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,64,0,1,fp8,fp8,0,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,float16,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,64,128,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,64,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,64,0,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,float16,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,64,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,64,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,64,0,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,float16,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,64,128,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,64,128,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,64,128,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,fp8,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,64,128,1,fp8,fp8,0,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,64,0,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,fp8,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,float16,0,0.014325333138306936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,float16,0,0.7279040018717448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,fp8,0,0.7338933149973551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,64,128,1,fp8,fp8,0,0.6659733454386393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,float16,0,0.7427946726481119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,fp8,0,0.7470506827036539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,64,128,1,fp8,fp8,0,0.6790613333384196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,float16,0,0.7600639661153158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,float16,0,4.400431950887044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,64,0,1,fp8,fp8,0,4.0520585378011065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,fp8,0,0.7671199639638265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,fp8,0,4.396335919698079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,float16,0,4.41267744700114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,64,128,1,fp8,fp8,0,0.7012639840443929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,fp8,0,4.418272018432617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,float16,0,0.4285920063654582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,64,0,1,fp8,fp8,0,4.0713653564453125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,fp8,0,0.438373327255249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,64,128,1,fp8,fp8,0,0.4079413414001465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,float16,0,4.430335998535156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,float16,0,0.38550400733947754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,fp8,0,0.3887733221054077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,float16,0,2.330575942993164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,64,128,1,fp8,fp8,0,0.35631998380025226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,fp8,0,2.33461332321167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,64,0,1,fp8,fp8,0,2.159322738647461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,64,0,1,fp8,fp8,0,4.0824534098307295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,fp8,0,4.4358774820963545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,float16,0,0.38817067941029865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,float16,0,2.2791573206583657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,fp8,0,0.3933440049489339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,64,128,1,fp8,fp8,0,0.361786683400472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,fp8,0,2.277754624684652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,float16,0,0.39829333623250324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,64,0,1,fp8,fp8,0,2.103951930999756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,fp8,0,0.4041866858800252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,float16,0,2.2895946502685547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,64,128,1,fp8,fp8,0,0.36977068583170575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,float16,0,0.24221332867940268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,fp8,0,2.285125255584717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,64,0,1,fp8,fp8,0,2.1090505917867026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,fp8,0,0.24841066201527914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,float16,0,2.294933319091797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,64,128,1,fp8,fp8,0,0.23400533199310303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,float16,0,1.248410701751709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,float16,0,0.21892799933751425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,fp8,0,2.298346678415934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,64,0,1,fp8,fp8,0,2.118933359781901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,fp8,0,0.22104533513387045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,64,128,1,fp8,fp8,0,0.2059626579284668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,fp8,0,1.2537813186645508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,64,0,1,fp8,fp8,0,1.159610668818156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,float16,0,1.2224746545155842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,float16,0,0.2209706703821818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,fp8,0,0.22378667195638022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,64,128,1,fp8,fp8,0,0.2075093388557434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,fp8,0,1.2202773094177246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,64,0,1,fp8,fp8,0,1.1303679943084717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,float16,0,1.2210079828898113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,float16,0,0.22817067305246988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,fp8,0,0.23025067647298178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,64,128,1,fp8,fp8,0,0.21363733212153116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,fp8,0,1.2236693700154622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,64,0,1,fp8,fp8,0,1.1339733600616455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,float16,0,0.16849599281946817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,float16,0,1.2285652955373128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,fp8,0,0.1680799921353658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,64,128,1,fp8,fp8,0,0.15813333789507547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,fp8,0,1.2301920255025227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,float16,0,0.7294987042744955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,64,0,1,fp8,fp8,0,1.1380159854888916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,float16,0,0.1646666626135508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,fp8,0,0.7262826760609945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,64,0,1,fp8,fp8,0,0.6706346670786539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,fp8,0,0.16429332892100015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,64,128,1,fp8,fp8,0,0.15435199936230978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,float16,0,0.7239573001861572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,float16,0,0.16495999693870544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,fp8,0,0.7190720240275065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,64,0,1,fp8,fp8,0,0.6677280267079672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,fp8,0,0.16394666830698648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,64,128,1,fp8,fp8,0,0.15449066956837973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,float16,0,0.7228426933288574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,float16,0,0.164383997519811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,64,0,1,fp8,fp8,0,0.6680693626403809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,fp8,0,0.16590933005015054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,fp8,0,0.7237652937571207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,64,128,1,fp8,fp8,0,0.15638400117556253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,float16,0,0.7221600214640299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,fp8,0,0.7238132953643799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,64,0,1,fp8,fp8,0,0.6671520074208578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,float16,0,0.5481866598129272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,64,128,1,fp8,fp8,0,0.5006986856460571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,fp8,0,0.5546186765034994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,float16,0,0.5567413171132406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,fp8,0,0.5619466702143351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,64,128,1,fp8,fp8,0,0.5102666616439819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,float16,0,2.608586629231771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,float16,0,0.5697173277537028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,64,0,1,fp8,fp8,0,2.402799924214681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,fp8,0,2.6187413533528647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,float16,0,2.6162400245666504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,fp8,0,0.5759040117263794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,64,128,1,fp8,fp8,0,0.5271733204523722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,fp8,0,2.6294933954874673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,float16,0,0.3261599938074748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,64,0,1,fp8,fp8,0,2.4156959851582847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,fp8,0,0.33427735169728595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,64,128,1,fp8,fp8,0,0.3101759950319926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,float16,0,2.6367732683817544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,float16,0,0.29124265909194946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,float16,0,1.405450661977132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,fp8,0,0.29370667537053424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,fp8,0,2.64466126759847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,fp8,0,1.411333401997884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,64,0,1,fp8,fp8,0,2.43340794245402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,64,128,1,fp8,fp8,0,0.27084267139434814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,64,0,1,fp8,fp8,0,1.3051946957906086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,float16,0,0.2953546643257141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,float16,0,1.3714027404785156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,fp8,0,0.2987839976946513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,64,128,1,fp8,fp8,0,0.2765493392944336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,fp8,0,1.3687733014424641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,64,0,1,fp8,fp8,0,1.2649386723836262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,float16,0,0.30364267031351727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,float16,0,1.3813546498616536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,fp8,0,1.374597390492757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,64,0,1,fp8,fp8,0,1.270800034205119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,fp8,0,1.385189374287923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,fp8,0,0.30778666337331134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,64,128,1,fp8,fp8,0,0.28268800179163617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,float16,0,1.3761812845865886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,float16,0,0.7698346773783366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,float16,0,0.18477867046991983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,fp8,0,0.1890559991200765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,64,0,1,fp8,fp8,0,1.2797813415527344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,64,128,1,fp8,fp8,0,0.17867734034856161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,float16,0,0.16225066781044006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,fp8,0,0.7719306945800781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,fp8,0,0.1643946667512258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,64,0,1,fp8,fp8,0,0.7141813437143961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,64,128,1,fp8,fp8,0,0.15406399965286255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,float16,0,0.745855967203776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,float16,0,0.16460266709327698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,fp8,0,0.7474613189697266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,64,0,1,fp8,fp8,0,0.6914453506469727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,fp8,0,0.16730666160583496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,64,128,1,fp8,fp8,0,0.15679466724395752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,float16,0,0.7463306585947672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,64,0,1,fp8,fp8,0,0.6939733028411865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,float16,0,0.17022399107615152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,fp8,0,0.745685338973999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,fp8,0,0.17271467049916586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,64,128,1,fp8,fp8,0,0.16251200437545776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,float16,0,0.7536373138427734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,float16,0,0.12351466218630473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,fp8,0,0.7536213397979736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,fp8,0,0.12309333682060242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,64,0,1,fp8,fp8,0,0.6981226603190104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,float16,0,0.46057601769765216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,64,128,1,fp8,fp8,0,0.11776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,fp8,0,0.46166932582855225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,float16,0,0.12379200259844463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,64,0,1,fp8,fp8,0,0.4280213514963786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,fp8,0,0.12327999869982402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,float16,0,0.4580693244934082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,64,128,1,fp8,fp8,0,0.11547199885050456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,fp8,0,0.46092267831166583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,float16,0,0.12337600191434224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,64,0,1,fp8,fp8,0,0.4243466854095459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,fp8,0,0.1236799955368042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,float16,0,0.45978132883707684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,64,128,1,fp8,fp8,0,0.11678933103879292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,fp8,0,0.45784000555674237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,float16,0,0.12230933705965678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,64,0,1,fp8,fp8,0,0.42607466379801434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,fp8,0,0.12360533078511556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,64,128,1,fp8,fp8,0,0.11714133620262146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,float16,0,0.4578666687011719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,fp8,0,0.45850133895874023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,64,0,1,fp8,fp8,0,0.4245866537094116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,float16,0,0.4552053213119507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,fp8,0,0.46003735065460205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,64,128,1,fp8,fp8,0,0.4163999954859416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,float16,0,0.4623146851857503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,fp8,0,0.4694453477859497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,float16,0,1.8955146471659343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,64,128,1,fp8,fp8,0,0.4256800015767415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,fp8,0,1.8988587061564128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,64,0,1,fp8,fp8,0,1.747920036315918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,float16,0,0.4736479918162028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,float16,0,1.895898660024007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,fp8,0,0.4790240128835042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,64,128,1,fp8,fp8,0,0.437605341275533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,64,0,1,fp8,fp8,0,1.751738707224528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,fp8,0,1.9053653081258137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,float16,0,0.27273066838582355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,fp8,0,0.2794026732444763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,64,128,1,fp8,fp8,0,0.2605920036633809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,float16,0,1.9127999941507976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,float16,0,1.0341440041859944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,float16,0,0.24050132433573404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,fp8,0,1.9216267267862956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,64,0,1,fp8,fp8,0,1.7649280230204265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,fp8,0,1.0364320278167725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,fp8,0,0.2442506750424703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,64,0,1,fp8,fp8,0,0.9586986700693766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,64,128,1,fp8,fp8,0,0.22590933243433634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,float16,0,0.9983146985371908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,float16,0,0.24737600485483804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,fp8,0,0.9999679724375407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,fp8,0,0.24819733699162802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,64,0,1,fp8,fp8,0,0.9261013666788737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,64,128,1,fp8,fp8,0,0.23024000724156699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,float16,0,1.0025866826375325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,float16,0,0.2510346571604411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,fp8,0,1.0050880114237468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,fp8,0,0.2557599941889445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,64,0,1,fp8,fp8,0,0.928335984547933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,64,128,1,fp8,fp8,0,0.23804799715677896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,float16,0,1.0102612972259521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,fp8,0,0.16074666380882263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,64,0,1,fp8,fp8,0,0.9345066547393799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,float16,0,0.1588213344415029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,fp8,0,1.0120000044504802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,64,128,1,fp8,fp8,0,0.15340800086657205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,float16,0,0.5724746783574423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,float16,0,0.1394773324330648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,fp8,0,0.5756693283716837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,64,0,1,fp8,fp8,0,0.5329440037409464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,fp8,0,0.14040000240008035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,float16,0,0.5509920120239258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,64,128,1,fp8,fp8,0,0.12965333461761475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,float16,0,0.1399999956289927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,fp8,0,0.5522933403650919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,64,0,1,fp8,fp8,0,0.5111306508382162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,fp8,0,0.1418560047944387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,float16,0,0.5527840058008829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,64,128,1,fp8,fp8,0,0.13557333747545877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,float16,0,0.14588800072669983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,fp8,0,0.5539999802907308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,64,0,1,fp8,fp8,0,0.5102826754252116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,fp8,0,0.14680000146230063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,64,128,1,fp8,fp8,0,0.13868799805641174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,float16,0,0.5577226479848226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,fp8,0,0.5610186656316122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,float16,0,0.11102933684984843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,64,0,1,fp8,fp8,0,0.5219626824061075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,fp8,0,0.10975466171900432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,float16,0,0.35123201211293537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,64,128,1,fp8,fp8,0,0.10317867000897725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,fp8,0,0.3489866654078166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,64,0,1,fp8,fp8,0,0.3265226682027181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,float16,0,0.10912000139554341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,fp8,0,0.11027733484903972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,float16,0,0.3499946594238281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,64,128,1,fp8,fp8,0,0.1030399998029073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,float16,0,0.3487893342971802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,fp8,0,0.3490560054779053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,float16,0,0.1092800001303355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,64,0,1,fp8,fp8,0,0.3250826597213745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,fp8,0,0.11003733674685161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,64,128,1,fp8,fp8,0,0.1032533347606659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,fp8,0,0.348688006401062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,64,128,1,fp8,fp8,0,0.10310932993888855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,float16,0,0.10943466424942017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,64,0,1,fp8,fp8,0,0.3242879907290141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,fp8,0,0.1092693308989207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,float16,0,0.3512586752573649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,fp8,0,0.34958934783935547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,64,0,1,fp8,fp8,0,0.3238453269004822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,float16,0,0.7115093072255453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,fp8,0,0.7157013416290283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,64,128,1,fp8,fp8,0,0.6480533281962076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,float16,0,0.7215840021769205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,fp8,0,0.7284159660339355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,64,128,1,fp8,fp8,0,0.6629279851913452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,float16,0,2.491999944051107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,fp8,0,2.4992106755574546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,64,0,1,fp8,fp8,0,2.2944053014119468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,float16,0,0.7432906627655029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,float16,0,2.5048747062683105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,fp8,0,0.7485600312550863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,64,128,1,fp8,fp8,0,0.6837333043416342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,float16,0,0.4126720031102498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,fp8,0,2.5082133611043296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,64,0,1,fp8,fp8,0,2.3122612635294595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,fp8,0,0.42016534010569256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,float16,0,2.529354731241862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,64,128,1,fp8,fp8,0,0.3915199836095174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,float16,0,1.339253266652425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,float16,0,0.36699732144673664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,fp8,0,2.536933263142904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,64,0,1,fp8,fp8,0,2.3335787455240884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,64,0,1,fp8,fp8,0,1.244762659072876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,fp8,0,1.346757411956787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,fp8,0,0.37098666032155353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,64,128,1,fp8,fp8,0,0.33854933579762775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,float16,0,0.3718453248341878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,float16,0,1.2873067061106365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,fp8,0,0.3755520184834798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,fp8,0,1.290453354517619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,64,128,1,fp8,fp8,0,0.3431253433227539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,64,0,1,fp8,fp8,0,1.1876266797383626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,float16,0,1.2954933643341064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,float16,0,0.3826719919840495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,fp8,0,0.3847893476486206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,64,128,1,fp8,fp8,0,0.3529493411382039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,fp8,0,1.2980639934539795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,64,0,1,fp8,fp8,0,1.194816033045451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,float16,0,0.22573866446812949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,float16,0,1.3026453653971355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,fp8,0,0.22894400358200073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,float16,0,0.714629332224528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,64,128,1,fp8,fp8,0,0.21466666460037231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,fp8,0,1.309391975402832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,64,0,1,fp8,fp8,0,1.2056372960408528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,float16,0,0.19715199867884317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,fp8,0,0.7207732995351156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,64,0,1,fp8,fp8,0,0.6650293270746866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,fp8,0,0.19922133286794028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,float16,0,0.19924267133076987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,64,128,1,fp8,fp8,0,0.1871253252029419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,float16,0,0.6850293477376302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,fp8,0,0.6872479915618896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,64,0,1,fp8,fp8,0,0.6371093193689982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,fp8,0,0.20216532548268637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,64,128,1,fp8,fp8,0,0.18925867478052774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,float16,0,0.6875893274943033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,float16,0,0.20772266387939453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,fp8,0,0.6880640188852946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,64,0,1,fp8,fp8,0,0.6386293172836304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,fp8,0,0.20949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,float16,0,0.13126400113105774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,64,128,1,fp8,fp8,0,0.19469332695007324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,float16,0,0.6956533590952555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,fp8,0,0.6976586977640787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,64,0,1,fp8,fp8,0,0.6465546687444051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,fp8,0,0.1328213314215342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,float16,0,0.4004853169123332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,64,128,1,fp8,fp8,0,0.12745599945386252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,fp8,0,0.4076480070749919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,float16,0,0.11560533444086711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,64,0,1,fp8,fp8,0,0.3766080141067505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,fp8,0,0.38636799653371173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,fp8,0,0.11744532982508342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,float16,0,0.3874400059382121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,64,128,1,fp8,fp8,0,0.10937066872914632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,64,128,1,fp8,fp8,0,0.1077280044555664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,float16,0,0.11734933654467265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,64,0,1,fp8,fp8,0,0.35814400513966876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,fp8,0,0.11929066975911458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,float16,0,0.3877813418706258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,fp8,0,0.3885440031687419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,float16,0,0.11947199702262878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,64,0,1,fp8,fp8,0,0.35995201269785565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,fp8,0,0.12126933534940083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,float16,0,0.389850656191508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,64,128,1,fp8,fp8,0,0.11430399616559346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,float16,0,0.2528266708056132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,fp8,0,0.39156798521677655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,float16,0,0.08859733740488689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,fp8,0,0.25249600410461426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,64,0,1,fp8,fp8,0,0.36324799060821533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,fp8,0,0.08853333195050557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,fp8,0,0.0883146623770396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,64,128,1,fp8,fp8,0,0.08437333504358928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,64,0,1,fp8,fp8,0,0.23363733291625977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,fp8,0,0.25250667333602905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,float16,0,0.0888266662756602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,float16,0,0.25038933753967285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,64,128,1,fp8,fp8,0,0.08442667126655579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,64,0,1,fp8,fp8,0,0.2333973248799642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,float16,0,0.0886400043964386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,float16,0,0.2506986657778422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,float16,0,0.08878399928410848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,fp8,0,0.09020800391832988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,64,128,1,fp8,fp8,0,0.08275199929873149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,fp8,0,0.25110934178034466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,64,0,1,fp8,fp8,0,0.2349920074144999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,fp8,0,0.08861866593360901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,float16,0,0.2528266708056132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,64,128,1,fp8,fp8,0,0.08242133259773254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,fp8,0,0.25221866369247437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,64,0,1,fp8,fp8,0,0.23621867100397745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,float16,0,0.5348000129063925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,fp8,0,0.5401920080184937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,64,128,1,fp8,fp8,0,0.4847946564356486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,float16,0,1.5225013097127278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,float16,0,0.5432106653849283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,fp8,0,0.5493599971135458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,fp8,0,1.5317974090576172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,64,0,1,fp8,fp8,0,1.3968480428059895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,64,128,1,fp8,fp8,0,0.4967946608861287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,float16,0,0.556165337562561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,float16,0,1.5334026018778484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,fp8,0,0.5624800125757853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,fp8,0,1.538042704264323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,64,0,1,fp8,fp8,0,1.408522605895996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,64,128,1,fp8,fp8,0,0.512773315111796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,float16,0,0.31427733103434247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,float16,0,1.5496266682942708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,fp8,0,0.3216800093650818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,64,128,1,fp8,fp8,0,0.29790933926900226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,float16,0,0.8342026869455973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,fp8,0,1.5557972590128581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,64,0,1,fp8,fp8,0,1.4295040766398113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,float16,0,0.27723199129104614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,fp8,0,0.8403466542561849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,64,0,1,fp8,fp8,0,0.7749173641204834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,fp8,0,0.2794666687647502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,64,128,1,fp8,fp8,0,0.25682665904362995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,float16,0,0.7944479783376058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,float16,0,0.2824853261311849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,fp8,0,0.7970080375671387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,64,0,1,fp8,fp8,0,0.7337866624196371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,fp8,0,0.28520532449086505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,64,128,1,fp8,fp8,0,0.26229333877563477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,float16,0,0.7986400127410889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,float16,0,0.28887999057769775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,fp8,0,0.800976037979126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,64,0,1,fp8,fp8,0,0.7401013374328613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,fp8,0,0.2934880057970683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,64,128,1,fp8,fp8,0,0.2705013354619344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,float16,0,0.8077920277913412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,float16,0,0.16863999764124551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,fp8,0,0.8092319965362549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,64,0,1,fp8,fp8,0,0.7488426367441813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,fp8,0,0.1731040080388387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,float16,0,0.45159467061360675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,64,128,1,fp8,fp8,0,0.16397333145141602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,fp8,0,0.45901866753896076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,float16,0,0.147189329067866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,64,0,1,fp8,fp8,0,0.42213332653045654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,fp8,0,0.1482133368651072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,float16,0,0.4279093345006307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,64,128,1,fp8,fp8,0,0.13768532872200012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,fp8,0,0.1524853308995565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,fp8,0,0.4305493434270223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,64,0,1,fp8,fp8,0,0.39788798491160077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,float16,0,0.15261866648991904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,float16,0,0.4297706683476766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,64,128,1,fp8,fp8,0,0.14285332957903543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,fp8,0,0.1578879952430725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,fp8,0,0.43220265706380206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,float16,0,0.15507733821868896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,64,0,1,fp8,fp8,0,0.40005334218343097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,float16,0,0.435973326365153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,64,128,1,fp8,fp8,0,0.15008533000946045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,float16,0,0.09641599655151367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,fp8,0,0.4376533428827922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,64,0,1,fp8,fp8,0,0.40698667367299396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,float16,0,0.25918400287628174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,fp8,0,0.09875200192133586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,64,128,1,fp8,fp8,0,0.09492799639701843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,fp8,0,0.26262933015823364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,64,0,1,fp8,fp8,0,0.24568533897399902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,float16,0,0.08691199620564778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,float16,0,0.24861333767573038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,fp8,0,0.08853333195050557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,64,128,1,fp8,fp8,0,0.08250133196512859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,float16,0,0.25034133593241376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,fp8,0,0.2505706747372945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,64,0,1,fp8,fp8,0,0.23149865865707397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,float16,0,0.08680533369382222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,fp8,0,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,64,128,1,fp8,fp8,0,0.08176533381144206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,fp8,0,0.09130666653315227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,fp8,0,0.2507573366165161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,64,0,1,fp8,fp8,0,0.2320586641629537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,float16,0,0.08876799543698628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,float16,0,0.2513013283411662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,64,128,1,fp8,fp8,0,0.0849120020866394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,fp8,0,0.25303467114766437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,64,0,1,fp8,fp8,0,0.23433067401250204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,float16,0,0.06806933383146922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,float16,0,0.17499200503031412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,fp8,0,0.06825600067774455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,64,128,1,fp8,fp8,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,fp8,0,0.17492800951004028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,64,0,1,fp8,fp8,0,0.16061866283416748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,float16,0,0.06842133402824402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,float16,0,0.17468800147374472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,fp8,0,0.06816000243028005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,64,128,1,fp8,fp8,0,0.06412800153096516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,fp8,0,0.17467733224232992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,64,0,1,fp8,fp8,0,0.16239999731381735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,float16,0,0.06770666440327962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,float16,0,0.17482666174570718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,fp8,0,0.06817066669464111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,fp8,0,0.06830933193365733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,64,128,1,fp8,fp8,0,0.06411199768384297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,fp8,0,0.17466666301091513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,64,0,1,fp8,fp8,0,0.16165866454442343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,float16,0,0.17362666130065918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,64,128,1,fp8,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,fp8,0,0.17286932468414307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,64,0,1,fp8,fp8,0,0.16158933440844217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,float16,0,0.7059733072916666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,fp8,0,0.7103626728057861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,64,128,1,fp8,fp8,0,0.6410880088806152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,float16,0,1.5342559814453125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,float16,0,0.720357338587443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,fp8,0,1.538256009419759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,fp8,0,0.723632017771403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,64,0,1,fp8,fp8,0,1.4025120735168457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,64,128,1,fp8,fp8,0,0.6556853453318278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,float16,0,1.545621395111084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,float16,0,0.739898681640625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,fp8,0,1.547829310099284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,64,0,1,fp8,fp8,0,1.420080025990804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,fp8,0,0.7449066638946533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,64,128,1,fp8,fp8,0,0.6801599661509196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,float16,0,0.4054986635843913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,float16,0,1.5705493291219075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,fp8,0,0.4142293135325114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,64,128,1,fp8,fp8,0,0.3834346532821655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,float16,0,0.8358399868011475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,fp8,0,1.5755839347839355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,64,0,1,fp8,fp8,0,1.441941261291504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,float16,0,0.3591253360112508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,fp8,0,0.8460853099822998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,64,0,1,fp8,fp8,0,0.780623992284139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,fp8,0,0.3619573513666789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,64,128,1,fp8,fp8,0,0.33018134037653607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,float16,0,0.7862346967061361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,float16,0,0.3646186590194702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,fp8,0,0.7910719712575277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,64,0,1,fp8,fp8,0,0.7238506476084391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,fp8,0,0.36842668056488037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,64,128,1,fp8,fp8,0,0.3361813227335612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,fp8,0,0.7941866715749105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,float16,0,0.7908373673756918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,float16,0,0.37293867270151776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,64,0,1,fp8,fp8,0,0.7297226587931315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,fp8,0,0.3773333231608073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,float16,0,0.21690666675567627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,64,128,1,fp8,fp8,0,0.34470399220784503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,float16,0,0.8031626542409261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,64,0,1,fp8,fp8,0,0.7379146416982015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,fp8,0,0.8078827063242594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,fp8,0,0.21962666511535645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,float16,0,0.4428639809290568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,64,128,1,fp8,fp8,0,0.20640534162521362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,fp8,0,0.44837331771850586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,float16,0,0.1871946652730306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,64,0,1,fp8,fp8,0,0.41465067863464355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,float16,0,0.4133973519007365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,fp8,0,0.1902986764907837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,64,128,1,fp8,fp8,0,0.17663466930389404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,fp8,0,0.19321600596110025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,fp8,0,0.4182720184326172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,64,0,1,fp8,fp8,0,0.3868639866511027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,fp8,0,0.42130132516225177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,float16,0,0.1906773249308268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,float16,0,0.4172373215357463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,64,128,1,fp8,fp8,0,0.1790026624997457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,float16,0,0.19798932472864786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,64,0,1,fp8,fp8,0,0.3891199827194214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,fp8,0,0.20129066705703735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,fp8,0,0.42681066195170086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,float16,0,0.42367998758951825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,64,128,1,fp8,fp8,0,0.18798933426539102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,64,0,1,fp8,fp8,0,0.3940800031026204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,float16,0,0.12155733505884807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,float16,0,0.24644800027211508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,64,0,1,fp8,fp8,0,0.23425066471099854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,fp8,0,0.12346667051315308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,64,128,1,fp8,fp8,0,0.11685867110888164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,fp8,0,0.24843200047810873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,float16,0,0.10540800293286641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,float16,0,0.2318720022837321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,fp8,0,0.10725333293279012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,64,128,1,fp8,fp8,0,0.09780266880989075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,fp8,0,0.23229332764943442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,64,0,1,fp8,fp8,0,0.21187732617060342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,float16,0,0.10549333691596985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,float16,0,0.23077332973480225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,float16,0,0.10787733395894368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,fp8,0,0.10689600308736165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,float16,0,0.2325706680615743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,64,128,1,fp8,fp8,0,0.0990666647752126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,fp8,0,0.23415466149648032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,64,0,1,fp8,fp8,0,0.2139093279838562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,64,128,1,fp8,fp8,0,0.10285866260528564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,float16,0,0.1466506620248159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,fp8,0,0.23594667514165243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,64,0,1,fp8,fp8,0,0.21975467602411905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,float16,0,0.06906133393446605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,fp8,0,0.07222400108973186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,64,128,1,fp8,fp8,0,0.06804800033569336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,fp8,0,0.14751999576886496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,float16,0,0.06763199965159099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,64,0,1,fp8,fp8,0,0.1381119986375173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,float16,0,0.14206399520238241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,fp8,0,0.06638399759928386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,fp8,0,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,64,128,1,fp8,fp8,0,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,fp8,0,0.14199466506640115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,64,0,1,fp8,fp8,0,0.1302079955736796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,float16,0,0.06599466502666473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,float16,0,0.14410133163134256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,64,128,1,fp8,fp8,0,0.06195199986298879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,fp8,0,0.14147733648618063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,64,0,1,fp8,fp8,0,0.1316106617450714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,float16,0,0.06644266843795776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,float16,0,0.14194132884343466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,fp8,0,0.06797866523265839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,fp8,0,0.055888002117474876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,64,128,1,fp8,fp8,0,0.06201066573460897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,fp8,0,0.14387200276056925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,64,0,1,fp8,fp8,0,0.09602666894594829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,64,0,1,fp8,fp8,0,0.13169067104657492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,float16,0,0.05412266651789347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,float16,0,0.1036959985891978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,64,128,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,64,128,1,fp8,fp8,0,0.04994666576385498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,fp8,0,0.10429867108662923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,float16,0,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,float16,0,0.10363733768463135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,fp8,0,0.10458133618036906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,64,0,1,fp8,fp8,0,0.09604266285896301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,64,0,1,fp8,fp8,0,0.09679999947547913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,float16,0,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,float16,0,0.1030506690343221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,fp8,0,0.0544106662273407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,64,128,1,fp8,fp8,0,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,64,128,1,fp8,fp8,0,0.051701332132021584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,fp8,0,0.10462933778762817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,float16,0,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,float16,0,0.10355200370152791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,fp8,0,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,fp8,0,0.10363200306892395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,64,0,1,fp8,fp8,0,0.09545066952705383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,float16,0,0.5294506549835205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,fp8,0,0.5366506576538086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,64,128,1,fp8,fp8,0,0.4820053180058797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,float16,0,0.9751359621683756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,float16,0,0.5373813311258951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,64,0,1,fp8,fp8,0,0.8917013009389242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,fp8,0,0.9791893164316813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,fp8,0,0.543066660563151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,float16,0,0.9778827031453451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,64,128,1,fp8,fp8,0,0.49114131927490234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,float16,0,0.5511039892832438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,fp8,0,0.9841386477152506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,64,0,1,fp8,fp8,0,0.9040213425954183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,fp8,0,0.5571306546529134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,64,128,1,fp8,fp8,0,0.5066986481348673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,float16,0,0.995194673538208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,float16,0,0.30987199147542316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,fp8,0,0.31563733021418255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,fp8,0,0.9977066516876221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,float16,0,0.5427306493123373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,64,128,1,fp8,fp8,0,0.2929226756095886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,64,0,1,fp8,fp8,0,0.5082826614379883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,64,0,1,fp8,fp8,0,0.9174453417460123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,float16,0,0.5024480024973551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,fp8,0,0.5499840180079142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,float16,0,0.2715146740277608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,fp8,0,0.273909330368042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,64,128,1,fp8,fp8,0,0.25337066253026325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,fp8,0,0.5063360134760538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,float16,0,0.2748746673266093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,64,0,1,fp8,fp8,0,0.4649173418680827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,float16,0,0.5071733395258585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,fp8,0,0.27897600332895917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,64,0,1,fp8,fp8,0,0.4715253512064616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,64,128,1,fp8,fp8,0,0.25704000393549603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,fp8,0,0.5093439817428589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,float16,0,0.2832213242848714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,fp8,0,0.2872320016225179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,float16,0,0.514789342880249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,64,128,1,fp8,fp8,0,0.26295467217763263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,float16,0,0.2895626624425252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,fp8,0,0.519594669342041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,float16,0,0.16340800126393637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,fp8,0,0.29493866364161175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,64,0,1,fp8,fp8,0,0.4780160188674927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,fp8,0,0.1679626703262329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,64,128,1,fp8,fp8,0,0.15683199961980185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,64,0,1,fp8,fp8,0,0.2738666733105977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,float16,0,0.1402346690495809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,float16,0,0.2679520050684611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,fp8,0,0.1431839962800344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,64,128,1,fp8,fp8,0,0.1322826643784841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,fp8,0,0.26889065901438397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,64,0,1,fp8,fp8,0,0.25006399552027386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,float16,0,0.14387733737627664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,float16,0,0.26910400390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,fp8,0,0.14589866995811462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,64,128,1,fp8,fp8,0,0.1372160017490387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,fp8,0,0.2728426655133565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,64,0,1,fp8,fp8,0,0.2532106637954712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,float16,0,0.14855999747912088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,float16,0,0.08881066242853801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,float16,0,0.27532800038655597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,fp8,0,0.15108266472816467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,64,128,1,fp8,fp8,0,0.14202666282653809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,fp8,0,0.27879999081293744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,64,0,1,fp8,fp8,0,0.25913600126902264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,float16,0,0.1641706625620524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,fp8,0,0.09150399764378865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,fp8,0,0.08070933322111766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,64,128,1,fp8,fp8,0,0.08903466661771138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,fp8,0,0.1683680017789205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,fp8,0,0.15650666753451029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,64,0,1,fp8,fp8,0,0.15678399801254272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,float16,0,0.0800906668106715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,float16,0,0.15599466363588968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,64,128,1,fp8,fp8,0,0.0738560010989507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,64,0,1,fp8,fp8,0,0.142085333665212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,float16,0,0.08035733302434285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,float16,0,0.15716266632080078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,fp8,0,0.08069866895675659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,64,128,1,fp8,fp8,0,0.07452266911665599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,fp8,0,0.15827733278274536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,64,0,1,fp8,fp8,0,0.14404267072677612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,float16,0,0.08246933420499165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,float16,0,0.15712533394495645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,float16,0,0.10379733641942342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,fp8,0,0.08283199866612752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,64,128,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,64,128,1,fp8,fp8,0,0.07653866708278656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,fp8,0,0.15987199544906616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,64,0,1,fp8,fp8,0,0.14446933070818582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,float16,0,0.09949333469072978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,fp8,0,0.051701332132021584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,float16,0,0.05337599913279215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,fp8,0,0.055786664287249245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,fp8,0,0.10504532853762309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,float16,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,float16,0,0.10090133547782898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,64,0,1,fp8,fp8,0,0.09857066472371419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,float16,0,0.050053333242734276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,64,128,1,fp8,fp8,0,0.04756799836953481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,fp8,0,0.10016000270843506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,64,0,1,fp8,fp8,0,0.09380267063776652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,float16,0,0.10116799672444661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,fp8,0,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,64,128,1,fp8,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,fp8,0,0.1009333332379659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,64,0,1,fp8,fp8,0,0.0937600036462148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,float16,0,0.0517546683549881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,fp8,0,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,64,128,1,fp8,fp8,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,fp8,0,0.10288000106811523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,64,0,1,fp8,fp8,0,0.09479467074076335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,64,0,1,fp8,fp8,0,0.06414933502674103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,float16,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,float16,0,0.06851199766000111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,float16,0,0.06810666620731354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,64,128,1,fp8,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,fp8,0,0.07017600039641063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,float16,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,fp8,0,0.03976533313592275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,64,128,1,fp8,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,fp8,0,0.06841599941253662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,64,0,1,fp8,fp8,0,0.06465599934260051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,float16,0,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,float16,0,0.07026133437951405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,float16,0,0.07086400190989177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,64,128,1,fp8,fp8,0,0.037733333806196846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,fp8,0,0.06823466718196869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,64,0,1,fp8,fp8,0,0.0643039991458257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,float16,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,fp8,0,0.03977066775163015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,64,128,1,fp8,fp8,0,0.039349332451820374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,fp8,0,0.06870399912198384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,64,0,1,fp8,fp8,0,0.06469866633415222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,float16,0,0.7300319671630859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,fp8,0,0.7317279974619547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,64,128,1,fp8,fp8,0,0.6523840030034384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,float16,0,1.0732800165812175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,fp8,0,1.0731946627298992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,float16,0,0.7458293437957764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,64,0,1,fp8,fp8,0,0.9724213282267252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,fp8,0,0.746064027150472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,float16,0,1.0917279720306396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,64,128,1,fp8,fp8,0,0.6603253285090128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,fp8,0,1.0861226717631023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,float16,0,0.7630080382029215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,64,0,1,fp8,fp8,0,0.9809973239898682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,fp8,0,0.7637653350830078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,64,128,1,fp8,fp8,0,0.685157299041748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,float16,0,0.4147253433863322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,float16,0,1.1089173158009846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,fp8,0,1.108021338780721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,64,0,1,fp8,fp8,0,1.0055572986602783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,float16,0,0.5961759885152181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,fp8,0,0.41705067952473956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,64,128,1,fp8,fp8,0,0.38709867000579834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,fp8,0,0.5963306824366251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,64,0,1,fp8,fp8,0,0.5525973240534464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,float16,0,0.3621866703033447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,float16,0,0.5391253232955933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,fp8,0,0.5409813324610392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,fp8,0,0.36434133847554523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,64,128,1,fp8,fp8,0,0.3309920032819112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,64,0,1,fp8,fp8,0,0.4950559933980306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,float16,0,0.3662559986114502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,fp8,0,0.3694133361180623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,float16,0,0.3789920012156169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,float16,0,0.5435680150985718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,64,128,1,fp8,fp8,0,0.3378400007883708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,fp8,0,0.5470826625823975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,64,0,1,fp8,fp8,0,0.5032480160395304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,float16,0,0.557861328125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,fp8,0,0.3805919885635376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,64,128,1,fp8,fp8,0,0.3475786844889323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,fp8,0,0.5598239898681641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,64,128,1,fp8,fp8,0,0.20498667160669962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,float16,0,0.2156053384145101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,fp8,0,0.3136639992396037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,float16,0,0.1861120065053304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,float16,0,0.27878934144973755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,64,0,1,fp8,fp8,0,0.5142346620559692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,64,128,1,fp8,fp8,0,0.17488000790278116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,float16,0,0.31063467264175415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,fp8,0,0.21761600176493326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,64,0,1,fp8,fp8,0,0.29054399331410724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,fp8,0,0.1869600017865499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,fp8,0,0.2813066641489665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,64,0,1,fp8,fp8,0,0.2619199951489766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,float16,0,0.18902933597564697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,64,0,1,fp8,fp8,0,0.2655893365542094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,float16,0,0.2814026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,fp8,0,0.19059733549753824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,64,128,1,fp8,fp8,0,0.1788426637649536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,fp8,0,0.28282666206359863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,float16,0,0.1967946688334147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,64,0,1,fp8,fp8,0,0.2725013295809428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,float16,0,0.29026132822036743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,fp8,0,0.19822933276494345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,64,128,1,fp8,fp8,0,0.185205340385437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,fp8,0,0.29156267642974854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,64,0,1,fp8,fp8,0,0.16192533572514853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,float16,0,0.11744532982508342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,float16,0,0.16876266400019327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,fp8,0,0.10223999619483948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,fp8,0,0.11889066298802693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,64,128,1,fp8,fp8,0,0.11334932843844096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,fp8,0,0.15412799517313638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,fp8,0,0.17034665743509927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,float16,0,0.10085333387056987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,float16,0,0.15108799934387207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,64,128,1,fp8,fp8,0,0.09436266620953877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,64,0,1,fp8,fp8,0,0.1402239998181661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,float16,0,0.1011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,fp8,0,0.10452266534169515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,float16,0,0.15416000286738077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,64,128,1,fp8,fp8,0,0.09492799639701843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,fp8,0,0.15450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,64,0,1,fp8,fp8,0,0.14192533493041992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,float16,0,0.10501333077748616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,float16,0,0.15504533052444458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,fp8,0,0.10727999607721965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,64,128,1,fp8,fp8,0,0.09907199939092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,fp8,0,0.15741866827011108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,64,0,1,fp8,fp8,0,0.1476800044377645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,fp8,0,0.09891733527183533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,float16,0,0.06404800216356914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,float16,0,0.09758399923642476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,fp8,0,0.0676746666431427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,64,128,1,fp8,fp8,0,0.06293866535027821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,64,0,1,fp8,fp8,0,0.09317866961161296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,float16,0,0.059989333152770996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,float16,0,0.09303999940554301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,fp8,0,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,64,128,1,fp8,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,fp8,0,0.061946665247281395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,fp8,0,0.09480533003807068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,64,0,1,fp8,fp8,0,0.08583999673525493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,64,0,1,fp8,fp8,0,0.08674133817354839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,float16,0,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,float16,0,0.09476799766222636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,64,128,1,fp8,fp8,0,0.057258665561676025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,fp8,0,0.09477866689364116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,float16,0,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,float16,0,0.09442666172981262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,fp8,0,0.06195199986298879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,64,128,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,fp8,0,0.09599467118581136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,64,0,1,fp8,fp8,0,0.0885706643263499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,64,0,1,fp8,fp8,0,0.0620000014702479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,float16,0,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,float16,0,0.06632533172766368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,64,128,1,fp8,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,64,128,1,fp8,fp8,0,0.042410666743914284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,float16,0,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,float16,0,0.06498666604359944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,fp8,0,0.06419733166694641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,fp8,0,0.06574399769306183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,64,0,1,fp8,fp8,0,0.059248000383377075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,float16,0,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,float16,0,0.06435733536879222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,fp8,0,0.04348266621430715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,64,128,1,fp8,fp8,0,0.0413973331451416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,64,0,1,fp8,fp8,0,0.05997333427270254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,float16,0,0.043738668163617454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,float16,0,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,fp8,0,0.04483200112978617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,64,128,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,fp8,0,0.06605866551399231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,64,0,1,fp8,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,float16,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,float16,0,0.051685333251953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,float16,0,0.05182399849096934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,64,128,1,fp8,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,fp8,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,64,0,1,fp8,fp8,0,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,float16,0,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,64,128,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,fp8,0,0.05002133548259735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,64,0,1,fp8,fp8,0,0.047781333327293396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,float16,0,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,float16,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,float16,0,0.051455999414126076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,64,128,1,fp8,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,64,128,1,fp8,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,64,0,1,fp8,fp8,0,0.047637333472569786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,float16,0,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,fp8,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,fp8,0,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,64,0,1,fp8,fp8,0,0.04801600178082784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,float16,0,0.5259999831517538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,float16,0,0.6980053583780924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,fp8,0,0.5338186820348104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,64,128,1,fp8,fp8,0,0.4806400140126546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,fp8,0,0.704469362894694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,float16,0,0.7062826951344808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,64,0,1,fp8,fp8,0,0.6416373252868652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,float16,0,0.5360159873962402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,fp8,0,0.5426453351974487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,64,128,1,fp8,fp8,0,0.49249064922332764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,fp8,0,0.7141493161519369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,64,0,1,fp8,fp8,0,0.6502346595128378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,float16,0,0.5489973227183024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,fp8,0,0.556165337562561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,float16,0,0.718010663986206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,fp8,0,0.7290666898091634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,64,128,1,fp8,fp8,0,0.5045280059178671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,float16,0,0.3080693284670512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,64,0,1,fp8,fp8,0,0.6653493245442709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,float16,0,0.398362676302592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,fp8,0,0.31596267223358154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,float16,0,0.27135999997456867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,64,128,1,fp8,fp8,0,0.2943306763966878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,fp8,0,0.4076266686121623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,64,0,1,fp8,fp8,0,0.3783200184504191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,fp8,0,0.2728853424390157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,float16,0,0.35842665036519367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,64,128,1,fp8,fp8,0,0.2502346634864807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,fp8,0,0.360645333925883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,64,0,1,fp8,fp8,0,0.33352001508076984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,float16,0,0.2735733389854431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,float16,0,0.35995733737945557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,fp8,0,0.2773653268814087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,64,128,1,fp8,fp8,0,0.25649066766103107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,fp8,0,0.36557332674662274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,64,0,1,fp8,fp8,0,0.3383893171946208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,float16,0,0.28170132637023926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,float16,0,0.36901867389678955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,fp8,0,0.28657599290211994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,64,128,1,fp8,fp8,0,0.2629599968592326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,fp8,0,0.37780265013376874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,64,0,1,fp8,fp8,0,0.3467093308766683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,float16,0,0.16230400403340658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,fp8,0,0.2165279984474182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,float16,0,0.21267733971277872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,fp8,0,0.16647467017173767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,64,128,1,fp8,fp8,0,0.15779733657836914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,64,128,1,fp8,fp8,0,0.13119999567667642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,64,0,1,fp8,fp8,0,0.20333333810170492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,float16,0,0.13985066612561545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,64,0,1,fp8,fp8,0,0.17600532372792563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,float16,0,0.18583466609319052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,fp8,0,0.14221333463986716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,fp8,0,0.1885546644528707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,float16,0,0.14146133263905844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,float16,0,0.1898933251698812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,fp8,0,0.14381333192189535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,64,128,1,fp8,fp8,0,0.1353493332862854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,fp8,0,0.19257599115371704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,64,0,1,fp8,fp8,0,0.1800853411356608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,float16,0,0.14620266358057657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,float16,0,0.194106658299764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,fp8,0,0.14959999918937683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,64,128,1,fp8,fp8,0,0.13962133725484213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,fp8,0,0.19713066021601358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,64,0,1,fp8,fp8,0,0.18606932957967123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,float16,0,0.08647466699282329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,float16,0,0.11706133683522542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,fp8,0,0.0886293351650238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,64,128,1,fp8,fp8,0,0.0867786705493927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,64,128,1,fp8,fp8,0,0.07218666871388753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,fp8,0,0.1193333367506663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,64,0,1,fp8,fp8,0,0.1144480009873708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,float16,0,0.07627200086911519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,float16,0,0.10805867115656535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,fp8,0,0.07828266421953838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,fp8,0,0.07735466460386912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,fp8,0,0.1092746655146281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,64,0,1,fp8,fp8,0,0.09911466638247173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,float16,0,0.07791466514269511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,float16,0,0.10736533006032307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,64,128,1,fp8,fp8,0,0.07262399792671204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,64,0,1,fp8,fp8,0,0.10148266951243083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,float16,0,0.07836266855398814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,float16,0,0.1090133289496104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,fp8,0,0.08063999811808269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,64,128,1,fp8,fp8,0,0.07415466507275899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,fp8,0,0.1109920044740041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,fp8,0,0.07247999807198842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,64,0,1,fp8,fp8,0,0.10256533821423848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,float16,0,0.04975466430187225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,float16,0,0.07076799869537354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,fp8,0,0.05187733471393585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,64,128,1,fp8,fp8,0,0.04888000090916952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,64,0,1,fp8,fp8,0,0.06758933266003926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,float16,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,float16,0,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,float16,0,0.06846933563550313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,fp8,0,0.04716266691684723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,64,128,1,fp8,fp8,0,0.04463466505209605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,fp8,0,0.06883733471234639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,64,0,1,fp8,fp8,0,0.06359999875227611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,64,0,1,fp8,fp8,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,float16,0,0.04762133459250132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,fp8,0,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,64,128,1,fp8,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,fp8,0,0.070592001080513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,64,128,1,fp8,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,fp8,0,0.06931733091672261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,float16,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,float16,0,0.06934399902820587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,fp8,0,0.04987733562787374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,64,0,1,fp8,fp8,0,0.06473066906134288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,float16,0,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,float16,0,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,64,128,1,fp8,fp8,0,0.031845333675543465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,fp8,0,0.0476693312327067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,64,128,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,64,0,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,float16,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,float16,0,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,64,0,1,fp8,fp8,0,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,64,0,1,fp8,fp8,0,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,float16,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,float16,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,64,128,1,fp8,fp8,0,0.031152000029881794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,fp8,0,0.045498669147491455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,64,128,1,fp8,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,fp8,0,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,float16,0,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,fp8,0,0.03349866718053818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,64,0,1,fp8,fp8,0,0.043103997906049095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,float16,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,float16,0,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,float16,0,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,64,128,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,64,0,1,fp8,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,float16,0,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,float16,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,fp8,0,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,64,128,1,fp8,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,64,0,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,float16,0,0.030826665461063385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,float16,0,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,fp8,0,0.030437332888444264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,64,128,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,64,0,1,fp8,fp8,0,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,64,0,1,fp8,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,float16,0,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,64,128,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,float16,0,0.6227253278096517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,float16,0,0.7314240137736002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,fp8,0,0.619327982266744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,64,128,1,fp8,fp8,0,0.5757493178049723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,fp8,0,0.7293012936909994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,64,0,1,fp8,fp8,0,0.6751680374145508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,float16,0,0.6344053347905477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,float16,0,0.7433333396911621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,fp8,0,0.6312373479207357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,64,128,1,fp8,fp8,0,0.6025226513544718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,fp8,0,0.7380586465199789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,64,0,1,fp8,fp8,0,0.7091626326243082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,float16,0,0.6386826833089193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,fp8,0,0.6345973412195841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,float16,0,0.7466080188751221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,64,128,1,fp8,fp8,0,0.6063839991887411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,float16,0,0.34696535269419354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,float16,0,0.4083840052286784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,fp8,0,0.7433386643727621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,64,0,1,fp8,fp8,0,0.7123626867930094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,64,128,1,fp8,fp8,0,0.3330293297767639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,fp8,0,0.34224534034729004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,fp8,0,0.4033546845118205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,64,0,1,fp8,fp8,0,0.38597333431243896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,float16,0,0.31725867589314777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,float16,0,0.3741226593653361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,64,0,1,fp8,fp8,0,0.34789331754048664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,fp8,0,0.31809600194295246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,64,128,1,fp8,fp8,0,0.29504533608754474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,fp8,0,0.3738986651102702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,64,128,1,fp8,fp8,0,0.3093600074450175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,float16,0,0.3242186705271403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,float16,0,0.3792639970779419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,fp8,0,0.32225600878397626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,fp8,0,0.3796000083287557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,64,0,1,fp8,fp8,0,0.363322655359904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,float16,0,0.3280959924062093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,float16,0,0.3836373488108317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,fp8,0,0.32822932799657184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,64,128,1,fp8,fp8,0,0.3129813273747762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,fp8,0,0.38023467858632404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,64,128,1,fp8,fp8,0,0.17672000328699747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,float16,0,0.18231467405954996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,64,0,1,fp8,fp8,0,0.36498133341471356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,float16,0,0.2145973245302836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,fp8,0,0.18071999152501425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,fp8,0,0.21327465772628784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,64,0,1,fp8,fp8,0,0.20576000213623047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,float16,0,0.16744534174601236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,float16,0,0.19684799512227377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,fp8,0,0.16643733779589334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,float16,0,0.19917333126068115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,64,128,1,fp8,fp8,0,0.15620799859364828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,fp8,0,0.1978613336881002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,64,0,1,fp8,fp8,0,0.18476267655690512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,float16,0,0.17176000277201334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,fp8,0,0.17018133401870728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,64,128,1,fp8,fp8,0,0.16156799594561258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,fp8,0,0.19978133837381998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,64,0,1,fp8,fp8,0,0.18927999337514242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,float16,0,0.1725119948387146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,float16,0,0.20142932732899985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,float16,0,0.10153067111968994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,64,0,1,fp8,fp8,0,0.19475199778874716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,fp8,0,0.1707520087560018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,64,128,1,fp8,fp8,0,0.16571199893951416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,fp8,0,0.20051199197769165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,float16,0,0.11771200100580852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,fp8,0,0.10071466366449992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,64,128,1,fp8,fp8,0,0.10053333640098572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,fp8,0,0.11715733011563619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,64,0,1,fp8,fp8,0,0.11585600177447002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,float16,0,0.09302933017412822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,float16,0,0.10910399754842122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,fp8,0,0.09295466542243958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,64,128,1,fp8,fp8,0,0.0846720039844513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,fp8,0,0.109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,64,0,1,fp8,fp8,0,0.10081066687901814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,float16,0,0.09499200185139973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,float16,0,0.1104159951210022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,fp8,0,0.09325866897900899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,64,128,1,fp8,fp8,0,0.08720533053080241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,fp8,0,0.10905599594116211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,64,128,1,fp8,fp8,0,0.08886933326721191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,64,0,1,fp8,fp8,0,0.10256000359853108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,float16,0,0.09500799576441447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,float16,0,0.11075733105341594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,fp8,0,0.09340799848238628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,fp8,0,0.1113973359266917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,64,0,1,fp8,fp8,0,0.10593066612879436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,float16,0,0.05782933533191681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,float16,0,0.0682826687892278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,fp8,0,0.05644799768924713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,64,128,1,fp8,fp8,0,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,fp8,0,0.06651733318964641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,64,0,1,fp8,fp8,0,0.0643146683772405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,float16,0,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,float16,0,0.06637866795063019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,float16,0,0.06446933249632518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,fp8,0,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,64,128,1,fp8,fp8,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,64,0,1,fp8,fp8,0,0.06193066636721293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,64,0,1,fp8,fp8,0,0.06165866553783417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,float16,0,0.05606933434804281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,fp8,0,0.05615466833114624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,64,128,1,fp8,fp8,0,0.05338666836420695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,fp8,0,0.06610666712125142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,64,128,1,fp8,fp8,0,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,fp8,0,0.06598400076230367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,float16,0,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,float16,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,fp8,0,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,64,0,1,fp8,fp8,0,0.06261866788069408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,64,0,1,fp8,fp8,0,0.04215999941031138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,float16,0,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,float16,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,64,128,1,fp8,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,fp8,0,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,float16,0,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,float16,0,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,64,128,1,fp8,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,64,128,1,fp8,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,fp8,0,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,64,0,1,fp8,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,float16,0,0.04516266783078512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,64,0,1,fp8,fp8,0,0.043749332427978516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,float16,0,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,64,128,1,fp8,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,64,0,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,float16,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,float16,0,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,64,128,1,fp8,fp8,0,0.02607999990383784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,64,0,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,float16,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,64,128,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,fp8,0,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,64,0,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,float16,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,float16,0,0.032799998919169106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,float16,0,0.03310399999221166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,64,128,1,fp8,fp8,0,0.02666666607062022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,64,0,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,64,0,1,fp8,fp8,0,0.032218667368094124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,fp8,0,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,64,128,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,float16,0,0.027674667537212372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,64,128,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,64,0,1,fp8,fp8,0,0.027669332921504974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,64,128,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,64,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,fp8,0,0.027776000400384266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,64,0,1,fp8,fp8,0,0.026816000541051228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,64,128,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,64,0,1,fp8,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,float16,0,0.6059039831161499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,float16,0,0.6116693417231241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,fp8,0,0.603653351465861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,64,128,1,fp8,fp8,0,0.5575199921925863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,fp8,0,0.6122293472290039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,64,0,1,fp8,fp8,0,0.5655573209126791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,float16,0,0.6177279949188232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,float16,0,0.621125340461731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,fp8,0,0.6148106654485067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,64,128,1,fp8,fp8,0,0.5888053178787231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,fp8,0,0.6220000187555949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,64,0,1,fp8,fp8,0,0.59497598807017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,float16,0,0.621237317721049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,fp8,0,0.61735467116038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,64,128,1,fp8,fp8,0,0.59225066502889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,float16,0,0.6269226471583048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,float16,0,0.3401333491007487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,fp8,0,0.6233599980672201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,float16,0,0.34353065490722656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,64,0,1,fp8,fp8,0,0.6001973152160645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,fp8,0,0.3332479993502299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,64,128,1,fp8,fp8,0,0.32586665948232013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,float16,0,0.3131626645723979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,fp8,0,0.3371093273162842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,64,0,1,fp8,fp8,0,0.3309280077616374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,float16,0,0.3104426662127177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,fp8,0,0.3091573317845662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,64,128,1,fp8,fp8,0,0.2861119906107585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,fp8,0,0.3113706707954407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,64,0,1,fp8,fp8,0,0.2900586724281311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,float16,0,0.31513599554697674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,fp8,0,0.31717334191004437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,float16,0,0.3168960014979045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,fp8,0,0.313914676507314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,64,128,1,fp8,fp8,0,0.3012426694234212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,64,0,1,fp8,fp8,0,0.3047413428624471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,float16,0,0.31731200218200684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,float16,0,0.3224160075187683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,fp8,0,0.31625600655873615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,64,128,1,fp8,fp8,0,0.3039360046386719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,fp8,0,0.31961599985758465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,float16,0,0.17787200212478638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,64,0,1,fp8,fp8,0,0.3085493246714274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,float16,0,0.18105600277582803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,float16,0,0.16359999775886536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,fp8,0,0.17537599802017212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,64,128,1,fp8,fp8,0,0.17324266831080118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,fp8,0,0.17884800831476846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,64,0,1,fp8,fp8,0,0.17603200674057007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,float16,0,0.1646719972292582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,fp8,0,0.1622773309548696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,64,128,1,fp8,fp8,0,0.15238933761914572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,fp8,0,0.16521599888801575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,64,0,1,fp8,fp8,0,0.15454933047294617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,float16,0,0.1664426624774933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,float16,0,0.16818133989969888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,fp8,0,0.16453333695729574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,64,128,1,fp8,fp8,0,0.15742400288581848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,fp8,0,0.16643733779589334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,64,0,1,fp8,fp8,0,0.15916799505551657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,fp8,0,0.1684266726175944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,float16,0,0.1675999959309896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,float16,0,0.16922134160995483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,fp8,0,0.16774400075276694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,64,128,1,fp8,fp8,0,0.16056000192960104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,64,0,1,fp8,fp8,0,0.16498133540153503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,float16,0,0.09873599807421367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,float16,0,0.0995199978351593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,float16,0,0.09141866366068523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,fp8,0,0.09832533200581868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,64,128,1,fp8,fp8,0,0.09816533327102661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,64,128,1,fp8,fp8,0,0.08262933293978374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,fp8,0,0.09990933537483215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,64,0,1,fp8,fp8,0,0.09874133268992107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,float16,0,0.09057600299517314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,float16,0,0.09264533718427022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,fp8,0,0.0902453362941742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,fp8,0,0.0925600032011668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,64,0,1,fp8,fp8,0,0.08424533406893413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,64,0,1,fp8,fp8,0,0.0862559974193573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,float16,0,0.09103999535242717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,float16,0,0.09266133109728496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,fp8,0,0.09139200051625569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,64,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,fp8,0,0.09462933739026387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,64,0,1,fp8,fp8,0,0.09005332986513774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,float16,0,0.09304533402125041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,fp8,0,0.0929813285668691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,64,128,1,fp8,fp8,0,0.08720533053080241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,64,128,1,fp8,fp8,0,0.05296533306439718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,fp8,0,0.09224533041318257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,float16,0,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,float16,0,0.055957332253456116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,fp8,0,0.055888002117474876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,fp8,0,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,64,0,1,fp8,fp8,0,0.055013333757718406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,float16,0,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,float16,0,0.0537013312180837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,fp8,0,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,64,128,1,fp8,fp8,0,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,fp8,0,0.05420266588528951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,64,0,1,fp8,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,float16,0,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,float16,0,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,fp8,0,0.05417066812515259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,64,128,1,fp8,fp8,0,0.05186666548252106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,fp8,0,0.054245332876841225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,64,0,1,fp8,fp8,0,0.05237866441408793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,float16,0,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,float16,0,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,float16,0,0.05569600065549215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,fp8,0,0.054474666714668274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,64,128,1,fp8,fp8,0,0.05202666421731313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,fp8,0,0.055776000022888184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,64,0,1,fp8,fp8,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,float16,0,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,64,128,1,fp8,fp8,0,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,fp8,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,64,0,1,fp8,fp8,0,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,float16,0,0.03706666578849157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,float16,0,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,64,128,1,fp8,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,fp8,0,0.03718400001525879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,64,0,1,fp8,fp8,0,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,float16,0,0.03603200117746989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,float16,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,64,128,1,fp8,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,64,0,1,fp8,fp8,0,0.034330666065216064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,float16,0,0.03588266670703888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,float16,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,64,128,1,fp8,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,64,0,1,fp8,fp8,0,0.03640533238649368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,float16,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,64,128,1,fp8,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,64,128,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,fp8,0,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,64,0,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,float16,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,float16,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,64,0,1,fp8,fp8,0,0.026816000541051228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,float16,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,64,128,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,64,0,1,fp8,fp8,0,0.026176000634829204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,float16,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,64,128,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,64,128,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,64,0,1,fp8,fp8,0,0.02565866708755493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,float16,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,64,0,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,float16,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,float16,0,0.02254933367172877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,64,128,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,64,0,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,float16,0,0.022298666338125866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,fp8,0,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,64,128,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,64,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,float16,0,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,64,128,1,fp8,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,64,0,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,fp8,0,0.021909333765506744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,64,128,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,64,0,1,fp8,fp8,0,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,float16,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,64,128,1,fp8,fp8,0,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,64,0,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,float16,0,0.022255999346574146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,64,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,float16,0,0.27910399436950684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,fp8,0,0.27781333525975543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,64,0,1,fp8,fp8,0,0.2553066611289978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,float16,0,0.2893279989560445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,float16,0,0.2863626678784688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,fp8,0,0.2877013285954793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,float16,0,0.28411734104156494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,fp8,0,0.28205867608388263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,64,128,1,fp8,fp8,0,0.2595786650975545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,64,128,1,fp8,fp8,0,0.2738933364550273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,fp8,0,0.2832533319791158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,fp8,0,0.2906240026156108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,64,0,1,fp8,fp8,0,0.27155200640360516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,float16,0,0.2919680078824361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,float16,0,0.28545600175857544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,64,128,1,fp8,fp8,0,0.27665066719055176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,fp8,0,0.16269866625467935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,fp8,0,0.284223993619283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,float16,0,0.16506666938463846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,64,0,1,fp8,fp8,0,0.15760533014933267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,float16,0,0.15041599671045938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,float16,0,0.14825066924095154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,64,0,1,fp8,fp8,0,0.27164800961812335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,float16,0,0.16261866688728333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,fp8,0,0.14723199605941772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,64,128,1,fp8,fp8,0,0.16050666570663452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,fp8,0,0.15824000040690103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,fp8,0,0.15035733580589294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,fp8,0,0.15312000115712485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,64,128,1,fp8,fp8,0,0.13929067055384317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,fp8,0,0.14833066860834757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,64,0,1,fp8,fp8,0,0.13587733109792074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,float16,0,0.15405866503715515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,float16,0,0.15212800105412802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,float16,0,0.15036267042160034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,64,128,1,fp8,fp8,0,0.14497066537539163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,64,0,1,fp8,fp8,0,0.14177067081133524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,float16,0,0.1551359991232554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,float16,0,0.09303999940554301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,fp8,0,0.1534986694653829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,64,128,1,fp8,fp8,0,0.14789866407712302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,64,128,1,fp8,fp8,0,0.09237333138783772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,64,0,1,fp8,fp8,0,0.09071466326713562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,fp8,0,0.15029866496721903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,64,0,1,fp8,fp8,0,0.1452959974606832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,fp8,0,0.08542933066685994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,float16,0,0.09296533465385437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,fp8,0,0.0928000013033549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,fp8,0,0.0904906690120697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,float16,0,0.08500799536705017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,float16,0,0.08447999755541484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,64,128,1,fp8,fp8,0,0.07696533203125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,64,128,1,fp8,fp8,0,0.07885866860548656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,fp8,0,0.08376000324885051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,64,0,1,fp8,fp8,0,0.07830399771531422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,float16,0,0.08676266670227051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,64,0,1,fp8,fp8,0,0.07597333192825317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,float16,0,0.08601066470146179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,float16,0,0.08505066235860188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,fp8,0,0.08672533432642619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,fp8,0,0.08406933148701985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,float16,0,0.0844693382581075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,float16,0,0.05217599868774414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,fp8,0,0.08687999844551086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,fp8,0,0.05171200136343638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,64,128,1,fp8,fp8,0,0.051088000337282814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,64,128,1,fp8,fp8,0,0.08353066444396973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,fp8,0,0.08438400427500407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,64,0,1,fp8,fp8,0,0.08084799846013387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,float16,0,0.05036800106366476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,fp8,0,0.051685333251953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,64,0,1,fp8,fp8,0,0.05003199974695841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,fp8,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,float16,0,0.05046399931112925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,float16,0,0.05007466673851013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,float16,0,0.049509331583976746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,fp8,0,0.04993066688378652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,64,128,1,fp8,fp8,0,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,64,0,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,64,0,1,fp8,fp8,0,0.04665066798528036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,float16,0,0.0496373325586319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,fp8,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,64,128,1,fp8,fp8,0,0.047210668524106346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,fp8,0,0.04930666585763296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,fp8,0,0.04935466746489207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,float16,0,0.05093866586685181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,float16,0,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,float16,0,0.049551998575528465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,fp8,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,64,128,1,fp8,fp8,0,0.04776533444722494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,64,0,1,fp8,fp8,0,0.04573333263397217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,float16,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,fp8,0,0.035242666800816856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,64,128,1,fp8,fp8,0,0.035445332527160645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,64,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,float16,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,float16,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,64,128,1,fp8,fp8,0,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,64,0,1,fp8,fp8,0,0.0329066663980484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,fp8,0,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,64,0,1,fp8,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,float16,0,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,64,128,1,fp8,fp8,0,0.03289066751797994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,float16,0,0.034314667185147606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,float16,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,64,128,1,fp8,fp8,0,0.03429866582155228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,64,0,1,fp8,fp8,0,0.033674667278925575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,float16,0,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,64,128,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,64,0,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,float16,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,64,128,1,fp8,fp8,0,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,64,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,float16,0,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,64,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,64,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,float16,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,float16,0,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,64,128,1,fp8,fp8,0,0.02385066697994868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,64,128,1,fp8,fp8,0,0.0207893339296182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,64,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,64,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,float16,0,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,float16,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,64,0,1,fp8,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,float16,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,fp8,0,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,64,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,64,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,float16,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,64,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,64,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,float16,0,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,fp8,0,0.017797333498795826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,64,0,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,float16,0,0.1569973329703013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,float16,0,0.15826132893562317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,fp8,0,0.15635733803113303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,64,128,1,fp8,fp8,0,0.14860799908638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,fp8,0,0.15812800327936807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,64,0,1,fp8,fp8,0,0.14947199821472168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,float16,0,0.16050666570663452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,float16,0,0.1602666676044464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,fp8,0,0.1593226691087087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,64,128,1,fp8,fp8,0,0.15410666664441428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,fp8,0,0.1584106683731079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,64,0,1,fp8,fp8,0,0.15416000286738077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,float16,0,0.16145599881807962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,float16,0,0.16246400276819864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,fp8,0,0.15961066881815592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,64,128,1,fp8,fp8,0,0.15917866428693137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,fp8,0,0.16064533591270447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,64,0,1,fp8,fp8,0,0.1584053337574005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,float16,0,0.09463466207186381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,float16,0,0.09496532877286275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,fp8,0,0.09266133109728496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,64,128,1,fp8,fp8,0,0.09477333227793376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,fp8,0,0.09297600388526917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,fp8,0,0.08686400453249614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,64,0,1,fp8,fp8,0,0.09477333227793376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,float16,0,0.08829866846402486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,float16,0,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,fp8,0,0.08746666709582011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,64,128,1,fp8,fp8,0,0.08237333099047343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,64,0,1,fp8,fp8,0,0.08246399958928426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,float16,0,0.08684800068537395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,float16,0,0.08741333087285359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,fp8,0,0.08714133501052856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,64,128,1,fp8,fp8,0,0.08322133123874664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,fp8,0,0.08702400326728821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,64,0,1,fp8,fp8,0,0.08282133440176646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,float16,0,0.0886293351650238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,float16,0,0.08910933136940002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,fp8,0,0.08686932921409607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,fp8,0,0.053413331508636475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,64,128,1,fp8,fp8,0,0.08575999736785889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,fp8,0,0.08847999572753906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,64,0,1,fp8,fp8,0,0.08585600058237712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,float16,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,float16,0,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,fp8,0,0.05162666738033295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,64,128,1,fp8,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,64,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,float16,0,0.052704001466433205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,float16,0,0.05189333359400431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,float16,0,0.05138133466243744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,64,128,1,fp8,fp8,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,64,0,1,fp8,fp8,0,0.04972266654173533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,float16,0,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,float16,0,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,64,128,1,fp8,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,64,128,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,64,0,1,fp8,fp8,0,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,float16,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,float16,0,0.053077335158983864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,fp8,0,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,fp8,0,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,64,0,1,fp8,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,float16,0,0.03279466678698858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,float16,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,64,128,1,fp8,fp8,0,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,64,0,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,float16,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,fp8,0,0.032986665765444435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,64,128,1,fp8,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,64,0,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,float16,0,0.032032000521818794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,float16,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,64,128,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,fp8,0,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,64,128,1,fp8,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,64,0,1,fp8,fp8,0,0.03291733314593633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,float16,0,0.032298666735490165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,fp8,0,0.03305600086847941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,fp8,0,0.03179199993610382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,64,0,1,fp8,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,64,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,float16,0,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,float16,0,0.02606933315594991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,fp8,0,0.029882666965325672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,64,128,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,64,0,1,fp8,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,float16,0,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,64,128,1,fp8,fp8,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,64,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,64,128,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,float16,0,0.025834667185942333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,64,128,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,64,0,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,float16,0,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,float16,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,64,128,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,64,0,1,fp8,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,64,128,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,64,0,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,float16,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,64,128,1,fp8,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,fp8,0,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,64,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,float16,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,64,0,1,fp8,fp8,0,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,64,0,1,fp8,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,float16,0,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,float16,0,0.1151626706123352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,fp8,0,0.11558399597803752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,float16,0,0.11527466773986816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,64,128,1,fp8,fp8,0,0.10737599929173787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,fp8,0,0.1151626706123352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,64,0,1,fp8,fp8,0,0.10734400153160095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,float16,0,0.11518399914105733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,float16,0,0.11729066570599873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,fp8,0,0.11540800333023071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,64,128,1,fp8,fp8,0,0.10899733503659566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,fp8,0,0.11544533570607503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,64,0,1,fp8,fp8,0,0.1076639990011851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,float16,0,0.1172320048014323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,64,0,1,fp8,fp8,0,0.11108799775441487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,float16,0,0.066170667608579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,float16,0,0.11787199974060059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,fp8,0,0.11608533064524333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,64,128,1,fp8,fp8,0,0.11105599999427795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,fp8,0,0.11523200074831645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,float16,0,0.06639466683069865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,fp8,0,0.06796266635258992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,64,128,1,fp8,fp8,0,0.06404800216356914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,fp8,0,0.06644799808661143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,64,0,1,fp8,fp8,0,0.06595199803511302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,64,0,1,fp8,fp8,0,0.06131199995676676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,float16,0,0.06608533362547557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,float16,0,0.06480533381303151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,fp8,0,0.06414400041103363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,64,128,1,fp8,fp8,0,0.062047998110453285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,fp8,0,0.06607466439406078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,float16,0,0.06601066887378693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,float16,0,0.06569066643714905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,fp8,0,0.06610133250554402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,64,128,1,fp8,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,fp8,0,0.06509866813818614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,64,0,1,fp8,fp8,0,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,float16,0,0.06597866614659627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,float16,0,0.0660159985224406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,fp8,0,0.06607999900976817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,64,128,1,fp8,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,fp8,0,0.06637866795063019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,64,128,1,fp8,fp8,0,0.03987200061480204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,64,0,1,fp8,fp8,0,0.06198399762312571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,float16,0,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,float16,0,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,fp8,0,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,fp8,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,fp8,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,64,0,1,fp8,fp8,0,0.04080000023047129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,64,0,1,fp8,fp8,0,0.039349332451820374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,float16,0,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,float16,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,64,128,1,fp8,fp8,0,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,fp8,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,float16,0,0.041221333046754204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,float16,0,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,64,128,1,fp8,fp8,0,0.039093332986036934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,64,0,1,fp8,fp8,0,0.039317332208156586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,float16,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,float16,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,64,128,1,fp8,fp8,0,0.04055466751257578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,fp8,0,0.04055466751257578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,64,128,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,64,0,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,float16,0,0.029125332832336426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,float16,0,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,float16,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,fp8,0,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,64,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,float16,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,fp8,0,0.027845333019892376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,64,128,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,64,0,1,fp8,fp8,0,0.026949333647886913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,fp8,0,0.028725333511829376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,64,128,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,fp8,0,0.02809600035349528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,64,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,64,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,float16,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,64,128,1,fp8,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,fp8,0,0.029120000700155895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,64,128,1,fp8,fp8,0,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,64,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,fp8,0,0.020058666666348774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,64,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,64,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,float16,0,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,64,128,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,float16,0,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,64,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,float16,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,64,0,1,fp8,fp8,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,64,0,1,fp8,fp8,0,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,64,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,float16,0,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,64,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,fp8,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,64,128,1,fp8,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,float16,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,64,0,1,fp8,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,fp8,0,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,float16,0,0.0929813285668691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,float16,0,0.09353066484133403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,fp8,0,0.09333333373069763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,64,128,1,fp8,fp8,0,0.08739200234413147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,fp8,0,0.09297066926956177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,64,0,1,fp8,fp8,0,0.08760000268618266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,float16,0,0.09406933188438416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,float16,0,0.09426666299502055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,fp8,0,0.09443199634552002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,64,128,1,fp8,fp8,0,0.08725333213806152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,fp8,0,0.09474666913350423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,fp8,0,0.09491200248400371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,64,0,1,fp8,fp8,0,0.08849066495895386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,float16,0,0.09475732843081157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,float16,0,0.09433600306510925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,64,128,1,fp8,fp8,0,0.08867200215657552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,fp8,0,0.09418666362762451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,64,0,1,fp8,fp8,0,0.08915733297665913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,float16,0,0.05541866521040598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,float16,0,0.0540533314148585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,float16,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,fp8,0,0.05400000015894572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,fp8,0,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,64,128,1,fp8,fp8,0,0.053786665201187134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,fp8,0,0.05583466589450836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,64,0,1,fp8,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,float16,0,0.053717335065205894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,64,128,1,fp8,fp8,0,0.051301335295041404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,64,0,1,fp8,fp8,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,float16,0,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,float16,0,0.0540533314148585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,fp8,0,0.0537120004494985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,float16,0,0.05386666456858317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,64,128,1,fp8,fp8,0,0.052042668064435325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,fp8,0,0.053802669048309326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,64,0,1,fp8,fp8,0,0.052832002441088356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,64,0,1,fp8,fp8,0,0.052058666944503784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,float16,0,0.0545066644748052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,fp8,0,0.0554613322019577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,64,128,1,fp8,fp8,0,0.051781331499417625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,64,128,1,fp8,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,fp8,0,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,float16,0,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,float16,0,0.03822399924198786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,float16,0,0.03513066718975703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,64,0,1,fp8,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,64,0,1,fp8,fp8,0,0.03518400092919668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,float16,0,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,64,128,1,fp8,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,float16,0,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,float16,0,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,float16,0,0.0365280012289683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,64,128,1,fp8,fp8,0,0.036117332677046456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,fp8,0,0.03705599904060364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,fp8,0,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,64,0,1,fp8,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,float16,0,0.02362666775782903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,float16,0,0.03606933355331421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,fp8,0,0.03692800054947535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,64,128,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,64,0,1,fp8,fp8,0,0.0351946676770846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,float16,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,64,128,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,64,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,64,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,64,0,1,fp8,fp8,0,0.022917332748572033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,float16,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,64,128,1,fp8,fp8,0,0.02219199885924657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,64,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,float16,0,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,float16,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,64,0,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,fp8,0,0.01964266722400983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,64,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,64,0,1,fp8,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,64,128,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,64,128,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,64,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,float16,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,float16,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,64,0,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,fp8,0,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,64,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,float16,0,0.08270933230717976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,float16,0,0.0828906645377477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,fp8,0,0.08263466755549113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,64,128,1,fp8,fp8,0,0.07730133334795634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,fp8,0,0.08242666721343994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,64,0,1,fp8,fp8,0,0.07806399961312611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,float16,0,0.08152533570925395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,float16,0,0.08251733581225078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,fp8,0,0.08248533308506012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,64,128,1,fp8,fp8,0,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,fp8,0,0.08266133566697438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,64,0,1,fp8,fp8,0,0.07857066889603932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,float16,0,0.08268266419569652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,float16,0,0.08240533371766408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,fp8,0,0.08257600168387096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,64,128,1,fp8,fp8,0,0.0783679982026418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,64,128,1,fp8,fp8,0,0.048357332746187844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,fp8,0,0.08272533118724823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,64,0,1,fp8,fp8,0,0.07854933540026347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,float16,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,float16,0,0.050288001696268715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,fp8,0,0.04967466493447622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,fp8,0,0.05077866713205973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,64,0,1,fp8,fp8,0,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,float16,0,0.049178664882977806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,float16,0,0.04957866668701172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,fp8,0,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,64,128,1,fp8,fp8,0,0.04621866842110952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,fp8,0,0.04962133367856344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,64,0,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,float16,0,0.04910400013128916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,float16,0,0.0492799977461497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,64,128,1,fp8,fp8,0,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,fp8,0,0.049322664737701416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,64,0,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,float16,0,0.05038933455944061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,float16,0,0.04979733129342397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,64,128,1,fp8,fp8,0,0.049269333481788635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,64,0,1,fp8,fp8,0,0.047930667797724404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,float16,0,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,float16,0,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,64,128,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,fp8,0,0.03196266790231069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,64,0,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,64,128,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,64,0,1,fp8,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,float16,0,0.03225066761175791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,64,128,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,64,128,1,fp8,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,64,0,1,fp8,fp8,0,0.030080000559488933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,64,0,1,fp8,fp8,0,0.030896000564098358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,float16,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,64,128,1,fp8,fp8,0,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,64,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,float16,0,0.022917332748572033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,float16,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,64,0,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,fp8,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,64,0,1,fp8,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,float16,0,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,64,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,64,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,64,128,1,fp8,fp8,0,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,64,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,float16,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,64,0,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,float16,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,64,128,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,64,128,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,float16,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,fp8,0,0.01599466676513354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,64,128,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,64,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,64,128,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,float16,0,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,64,0,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,64,128,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,fp8,0,0.01599466676513354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,float16,0,0.4775466521581014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,fp8,0,0.48392534255981445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,64,128,1,fp8,fp8,0,0.44782400131225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,float16,0,0.4933280150095622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,fp8,0,0.49512000878651935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,64,128,1,fp8,fp8,0,0.46348798274993896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,float16,0,2.9669278462727866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,float16,0,0.5058879852294922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,64,0,1,fp8,fp8,0,2.7440532048543296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,fp8,0,2.9636265436808267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,float16,0,2.9778931935628257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,fp8,0,0.5124586820602417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,64,128,1,fp8,fp8,0,0.48266132672627765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,fp8,0,2.9829066594441733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,64,0,1,fp8,fp8,0,2.76036802927653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,float16,0,0.28356800476710003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,fp8,0,0.28991466760635376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,64,128,1,fp8,fp8,0,0.2770773371060689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,float16,0,2.9935146967569985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,float16,0,0.2547679940859477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,float16,0,1.5914986928304036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,fp8,0,2.998965263366699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,fp8,0,1.598405361175537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,64,0,1,fp8,fp8,0,2.7756532033284507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,64,0,1,fp8,fp8,0,1.4754133224487305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,fp8,0,0.2564319968223572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,64,128,1,fp8,fp8,0,0.24038932720820108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,float16,0,1.5570400555928547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,float16,0,0.25755733251571655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,fp8,0,0.2622986634572347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,64,128,1,fp8,fp8,0,0.24791467189788818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,64,0,1,fp8,fp8,0,1.4444799423217773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,fp8,0,1.5552959442138672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,float16,0,1.5580213864644368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,float16,0,0.2670560081799825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,fp8,0,0.2728640039761861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,fp8,0,1.5641919771830242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,64,128,1,fp8,fp8,0,0.2569013237953186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,64,0,1,fp8,fp8,0,1.4507840474446614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,float16,0,0.16126933693885803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,float16,0,1.567178726196289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,fp8,0,0.164682666460673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,64,128,1,fp8,fp8,0,0.1585599978764852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,float16,0,0.8726773262023926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,float16,0,0.14222932855288187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,64,0,1,fp8,fp8,0,1.4576427141825359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,fp8,0,1.5720960299173992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,fp8,0,0.8747946421305338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,fp8,0,0.1421386698881785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,64,0,1,fp8,fp8,0,0.8108747005462646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,64,128,1,fp8,fp8,0,0.1360586682955424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,float16,0,0.851423978805542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,float16,0,0.14473600188891092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,fp8,0,0.8514453570048014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,fp8,0,0.1477173368136088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,64,0,1,fp8,fp8,0,0.7903199990590414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,64,128,1,fp8,fp8,0,0.1418186624844869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,float16,0,0.15061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,float16,0,0.8515573342641195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,64,0,1,fp8,fp8,0,0.7973120212554932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,fp8,0,0.8568320274353027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,fp8,0,0.15411200126012167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,64,128,1,fp8,fp8,0,0.14827199776967367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,float16,0,0.8609920342763265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,float16,0,0.1136799951394399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,fp8,0,0.11246933539708455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,fp8,0,0.8597333431243896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,64,0,1,fp8,fp8,0,0.8013333479563395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,float16,0,0.5271519819895426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,64,128,1,fp8,fp8,0,0.10916800300280254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,float16,0,0.11315199732780457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,float16,0,0.522650678952535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,fp8,0,0.5246880054473877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,64,0,1,fp8,fp8,0,0.48655998706817627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,fp8,0,0.5232693354288737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,64,0,1,fp8,fp8,0,0.4861706495285034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,fp8,0,0.11314133803049724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,fp8,0,0.1113813320795695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,64,128,1,fp8,fp8,0,0.10702932874361674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,float16,0,0.11309867103894551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,float16,0,0.5232853492101034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,64,128,1,fp8,fp8,0,0.10705600182215373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,fp8,0,0.5244160095850626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,float16,0,0.11244266231854756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,64,0,1,fp8,fp8,0,0.48554666837056476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,fp8,0,0.11319999893506368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,float16,0,0.5234026511510214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,64,128,1,fp8,fp8,0,0.10724799831708272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,fp8,0,0.5233546495437622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,64,0,1,fp8,fp8,0,0.4841013352076213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,64,128,1,fp8,fp8,0,0.3375733296076457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,float16,0,0.36019734541575116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,fp8,0,0.36294400691986084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,float16,0,0.3685226837793986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,fp8,0,0.37275199095408124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,float16,0,1.7624212900797527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,64,128,1,fp8,fp8,0,0.34940799077351886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,64,0,1,fp8,fp8,0,1.6355466842651367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,fp8,0,1.7700907389322917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,float16,0,0.3802613417307536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,fp8,0,0.3853706518809001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,float16,0,1.773589293162028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,64,128,1,fp8,fp8,0,0.36221333344777423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,fp8,0,1.7833120028177898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,64,0,1,fp8,fp8,0,1.6502666473388672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,float16,0,0.2193173368771871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,float16,0,1.7858559290568035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,fp8,0,0.22403200467427573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,64,128,1,fp8,fp8,0,0.2139093279838562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,float16,0,0.9671839872996012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,float16,0,0.1930720011393229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,fp8,0,1.7934293746948242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,64,0,1,fp8,fp8,0,1.660842736562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,fp8,0,0.9716160297393799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,fp8,0,0.19513599077860513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,64,0,1,fp8,fp8,0,0.9027520020802816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,float16,0,0.9404640197753906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,64,128,1,fp8,fp8,0,0.18555200099945068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,float16,0,0.1985386610031128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,fp8,0,0.9404426415761312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,fp8,0,0.20005865891774496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,64,0,1,fp8,fp8,0,0.8713653087615967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,64,128,1,fp8,fp8,0,0.19124799966812134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,float16,0,0.9423147042592367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,float16,0,0.20709866285324097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,fp8,0,0.943008025487264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,fp8,0,0.2096959948539734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,64,0,1,fp8,fp8,0,0.8771626949310303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,64,128,1,fp8,fp8,0,0.19921600818634033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,float16,0,0.9522826671600342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,float16,0,0.1277653376261393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,fp8,0,0.13014400005340576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,fp8,0,0.9563466707865397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,64,0,1,fp8,fp8,0,0.8856053352355957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,64,128,1,fp8,fp8,0,0.12692266702651978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,float16,0,0.5424746672312418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,float16,0,0.1143999993801117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,fp8,0,0.5455946524937948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,fp8,0,0.11538666486740112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,64,0,1,fp8,fp8,0,0.5062666734059652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,64,128,1,fp8,fp8,0,0.1072746713956197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,float16,0,0.5253920157750448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,fp8,0,0.5283199946085612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,float16,0,0.11583999792734782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,64,0,1,fp8,fp8,0,0.4875893195470174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,fp8,0,0.11668800314267476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,64,128,1,fp8,fp8,0,0.1106666624546051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,float16,0,0.5293386777242025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,fp8,0,0.5300000111262003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,float16,0,0.5311520099639893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,float16,0,0.12026133139928182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,64,0,1,fp8,fp8,0,0.4922240177790324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,fp8,0,0.12141333023707072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,64,128,1,fp8,fp8,0,0.11850133538246155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,float16,0,0.08680533369382222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,fp8,0,0.532693346341451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,64,0,1,fp8,fp8,0,0.49934931596120197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,fp8,0,0.08711999654769897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,float16,0,0.33639466762542725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,64,128,1,fp8,fp8,0,0.08475200335184734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,fp8,0,0.3360533316930135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,float16,0,0.08825600147247314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,64,0,1,fp8,fp8,0,0.3120479981104533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,fp8,0,0.0885706643263499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,float16,0,0.3337973356246948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,64,128,1,fp8,fp8,0,0.08452266454696655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,fp8,0,0.33646400769551593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,64,0,1,fp8,fp8,0,0.3124426603317261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,float16,0,0.0867680013179779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,fp8,0,0.08779733379681905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,float16,0,0.3356106678644816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,64,128,1,fp8,fp8,0,0.08454933762550354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,fp8,0,0.3340799808502197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,float16,0,0.08797867099444072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,64,0,1,fp8,fp8,0,0.3139520088831584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,fp8,0,0.08790399630864461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,float16,0,0.3364106814066569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,64,128,1,fp8,fp8,0,0.08436266581217448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,fp8,0,0.33499733606974286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,64,0,1,fp8,fp8,0,0.31380800406138104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,float16,0,0.3020000060399373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,fp8,0,0.30505599578221637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,float16,0,1.2852319876352947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,float16,0,0.30867733558019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,64,128,1,fp8,fp8,0,0.2850133379300435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,fp8,0,0.3118773301442464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,fp8,0,1.28548264503479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,64,0,1,fp8,fp8,0,1.1887253125508626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,64,128,1,fp8,fp8,0,0.2933280070622762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,float16,0,0.3185546596844991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,float16,0,1.2905813058217366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,fp8,0,0.32411734263102215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,64,128,1,fp8,fp8,0,0.30426132678985596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,fp8,0,1.2961119810740154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,64,0,1,fp8,fp8,0,1.2003733317057292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,float16,0,0.18375466267267862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,fp8,0,0.1871359944343567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,float16,0,1.2964426676432292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,64,128,1,fp8,fp8,0,0.17902400096257529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,float16,0,0.7114773591359457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,fp8,0,1.3092586994171143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,64,0,1,fp8,fp8,0,1.2092959880828857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,float16,0,0.159770667552948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,64,128,1,fp8,fp8,0,0.15636799732844034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,fp8,0,0.7167840003967285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,64,0,1,fp8,fp8,0,0.6679733594258627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,fp8,0,0.16245866815249124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,float16,0,0.6878080368041992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,float16,0,0.16501333316167197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,fp8,0,0.69158935546875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,64,0,1,fp8,fp8,0,0.6420640150705973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,fp8,0,0.16695467631022134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,fp8,0,0.6943093140920004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,float16,0,0.17156267166137695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,64,128,1,fp8,fp8,0,0.16028799613316855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,float16,0,0.6898132960001627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,64,0,1,fp8,fp8,0,0.6465866565704346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,fp8,0,0.17493333419164023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,64,128,1,fp8,fp8,0,0.16808533668518066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,float16,0,0.6993599732716879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,float16,0,0.10709866881370544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,float16,0,0.40354132652282715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,fp8,0,0.7035360336303711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,64,0,1,fp8,fp8,0,0.6530880133310953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,fp8,0,0.10921066999435425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,64,128,1,fp8,fp8,0,0.10733866691589355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,fp8,0,0.4078986644744873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,float16,0,0.09719467163085938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,64,0,1,fp8,fp8,0,0.38091198603312176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,fp8,0,0.09781866272290547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,float16,0,0.3932693401972453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,64,128,1,fp8,fp8,0,0.09122666716575623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,fp8,0,0.394320011138916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,float16,0,0.09703999757766724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,64,0,1,fp8,fp8,0,0.36557865142822266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,fp8,0,0.09884799520174663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,float16,0,0.39396798610687256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,64,128,1,fp8,fp8,0,0.09299733241399129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,fp8,0,0.39584533373514813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,float16,0,0.10084266463915507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,64,0,1,fp8,fp8,0,0.3666079839070638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,fp8,0,0.10116266210873921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,float16,0,0.39616533120473224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,fp8,0,0.07829333345095317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,64,128,1,fp8,fp8,0,0.09915733337402344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,fp8,0,0.39957332611083984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,float16,0,0.0784800002972285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,64,0,1,fp8,fp8,0,0.3734453519185384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,float16,0,0.2627519965171814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,float16,0,0.2623093326886495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,64,128,1,fp8,fp8,0,0.07441600163777669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,fp8,0,0.26172266403834027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,64,0,1,fp8,fp8,0,0.24412266413370767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,float16,0,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,fp8,0,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,float16,0,0.2626346747080485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,64,128,1,fp8,fp8,0,0.07431999842325847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,fp8,0,0.26268800099690753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,float16,0,0.07825600107510884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,64,0,1,fp8,fp8,0,0.2450773318608602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,fp8,0,0.07842666904131572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,64,128,1,fp8,fp8,0,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,fp8,0,0.26293333371480304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,64,0,1,fp8,fp8,0,0.2430186669031779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,float16,0,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,float16,0,0.261354664961497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,fp8,0,0.07842666904131572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,64,128,1,fp8,fp8,0,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,fp8,0,0.261680006980896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,64,0,1,fp8,fp8,0,0.24457067251205444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,float16,0,0.46765867869059247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,fp8,0,0.471504012743632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,64,128,1,fp8,fp8,0,0.4359360138575236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,float16,0,0.47945066293080646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,float16,0,1.6718079249064128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,fp8,0,0.48533864816029865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,64,128,1,fp8,fp8,0,0.4534720182418823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,fp8,0,1.6764426231384277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,64,0,1,fp8,fp8,0,1.5503840446472168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,float16,0,0.4956959883371989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,float16,0,1.6916106541951497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,fp8,0,0.502623995145162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,64,128,1,fp8,fp8,0,0.4714346726735433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,fp8,0,1.6913280487060547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,64,0,1,fp8,fp8,0,1.5703412691752117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,float16,0,0.2720800042152405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,fp8,0,0.279039998849233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,float16,0,1.707215944925944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,64,128,1,fp8,fp8,0,0.26392533381779987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,float16,0,0.912986675898234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,fp8,0,1.7116907437642415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,64,0,1,fp8,fp8,0,1.5884532928466797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,float16,0,0.24116265773773193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,fp8,0,0.2437173326810201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,fp8,0,0.9183893203735352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,64,128,1,fp8,fp8,0,0.22854934136072794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,64,0,1,fp8,fp8,0,0.8502879937489828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,float16,0,0.8744160334269205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,64,0,1,fp8,fp8,0,0.8169866402943929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,float16,0,0.24631466468175253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,fp8,0,0.8751306533813477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,fp8,0,0.2503146727879842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,float16,0,0.25571733713150024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,64,128,1,fp8,fp8,0,0.23666133483250937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,float16,0,0.879477341969808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,fp8,0,0.8847893079121908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,64,128,1,fp8,fp8,0,0.24665067593256632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,fp8,0,0.2598506609598796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,64,0,1,fp8,fp8,0,0.8221386273701986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,float16,0,0.8899679978688558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,float16,0,0.1479146679242452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,fp8,0,0.8956747055053711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,64,128,1,fp8,fp8,0,0.14587733149528503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,fp8,0,0.15121600031852722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,64,0,1,fp8,fp8,0,0.8333760102589926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,float16,0,0.4979999860127767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,fp8,0,0.5004533529281616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,float16,0,0.12773866454760233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,64,0,1,fp8,fp8,0,0.46484267711639404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,fp8,0,0.1295360028743744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,float16,0,0.47518400351206463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,64,128,1,fp8,fp8,0,0.12241066495577495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,fp8,0,0.4773333470026652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,float16,0,0.13142399986584982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,64,0,1,fp8,fp8,0,0.44272534052530926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,fp8,0,0.13265599807103476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,float16,0,0.47627198696136475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,64,128,1,fp8,fp8,0,0.12772267063458762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,fp8,0,0.48000534375508624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,float16,0,0.13636799653371176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,64,0,1,fp8,fp8,0,0.4482613404591878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,fp8,0,0.13871467113494873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,float16,0,0.4840693473815918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,64,128,1,fp8,fp8,0,0.13523200154304504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,float16,0,0.08664533495903015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,fp8,0,0.48760533332824707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,float16,0,0.28651199738184613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,64,0,1,fp8,fp8,0,0.4567840099334717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,fp8,0,0.0886400043964386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,fp8,0,0.08036266764005025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,64,128,1,fp8,fp8,0,0.08847999572753906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,fp8,0,0.28991466760635376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,64,0,1,fp8,fp8,0,0.2732853293418884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,float16,0,0.08054933448632558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,float16,0,0.2797066569328308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,64,128,1,fp8,fp8,0,0.07664533456166585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,64,128,1,fp8,fp8,0,0.07678399980068207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,fp8,0,0.28243199984232586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,64,0,1,fp8,fp8,0,0.2606613238652547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,float16,0,0.08105066418647766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,float16,0,0.2810879945755005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,fp8,0,0.08248533308506012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,fp8,0,0.28300267457962036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,64,0,1,fp8,fp8,0,0.26260266701380414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,float16,0,0.08233066896597545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,64,0,1,fp8,fp8,0,0.2630186676979065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,fp8,0,0.08444799979527791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,float16,0,0.28438933690388996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,64,128,1,fp8,fp8,0,0.08066133161385854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,fp8,0,0.28380799293518066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,float16,0,0.06252266466617584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,float16,0,0.19448532660802206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,fp8,0,0.0625546673933665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,64,128,1,fp8,fp8,0,0.05996266504128774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,fp8,0,0.19327465693155924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,64,0,1,fp8,fp8,0,0.18116267522176108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,float16,0,0.06243733565012614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,float16,0,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,float16,0,0.19404266277949014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,fp8,0,0.06258133550484975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,64,128,1,fp8,fp8,0,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,fp8,0,0.19366933902104697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,64,0,1,fp8,fp8,0,0.17974400520324707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,float16,0,0.19474667310714722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,64,128,1,fp8,fp8,0,0.05986666679382324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,fp8,0,0.19528534015019736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,64,0,1,fp8,fp8,0,0.18149334192276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,float16,0,0.06206400195757548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,float16,0,0.19430933396021524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,64,128,1,fp8,fp8,0,0.05977599819501241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,fp8,0,0.19325333833694458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,64,0,1,fp8,fp8,0,0.18082133928934732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,float16,0,0.35161598523457843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,fp8,0,0.3547360102335612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,64,128,1,fp8,fp8,0,0.33034666379292804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,float16,0,1.0271626313527424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,fp8,0,1.0289493401845295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,float16,0,0.36128000418345135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,64,0,1,fp8,fp8,0,0.9536853631337484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,float16,0,1.0374346574147542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,fp8,0,0.36564799149831134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,64,128,1,fp8,fp8,0,0.34069867928822833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,float16,0,0.37236801783243817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,fp8,0,1.039904038111369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,64,0,1,fp8,fp8,0,0.9637813568115234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,fp8,0,0.3768373330434163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,64,128,1,fp8,fp8,0,0.3556693394978841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,float16,0,1.0516693592071533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,fp8,0,0.2152000069618225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,float16,0,0.20986666282018027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,fp8,0,1.0530613263448079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,float16,0,0.5690133174260458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,fp8,0,0.5742559830347697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,64,0,1,fp8,fp8,0,0.9820586840311686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,64,128,1,fp8,fp8,0,0.20413333177566528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,float16,0,0.1830400029818217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,64,0,1,fp8,fp8,0,0.5347146590550741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,fp8,0,0.18504534165064493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,float16,0,0.540832002957662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,64,128,1,fp8,fp8,0,0.17688000202178955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,fp8,0,0.5427039861679077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,float16,0,0.18924800554911295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,64,0,1,fp8,fp8,0,0.5087466637293497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,fp8,0,0.19058134158452353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,float16,0,0.5454613367716471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,64,128,1,fp8,fp8,0,0.18184000253677368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,fp8,0,0.5474186738332113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,fp8,0,0.19932266076405844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,float16,0,0.19730132818222046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,64,0,1,fp8,fp8,0,0.5108906825383505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,float16,0,0.11725333333015442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,float16,0,0.5582293272018433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,64,128,1,fp8,fp8,0,0.18951465686162314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,fp8,0,0.5574560165405273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,64,0,1,fp8,fp8,0,0.5195039908091227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,float16,0,0.3184640010197957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,fp8,0,0.11931733290354411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,64,128,1,fp8,fp8,0,0.11672000090281169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,fp8,0,0.3203253348668416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,64,0,1,fp8,fp8,0,0.29803200562795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,float16,0,0.10109866658846538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,fp8,0,0.30182933807373047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,fp8,0,0.10428800185521443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,float16,0,0.3014880021413167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,64,128,1,fp8,fp8,0,0.09669867157936096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,64,0,1,fp8,fp8,0,0.27994134028752643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,float16,0,0.10495466987291972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,fp8,0,0.3043839931488037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,float16,0,0.3011946678161621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,fp8,0,0.10518399874369304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,64,128,1,fp8,fp8,0,0.09936533371607463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,64,0,1,fp8,fp8,0,0.28304533163706463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,float16,0,0.10799466570218404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,fp8,0,0.11043733358383179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,float16,0,0.3067893385887146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,64,128,1,fp8,fp8,0,0.10700800021489461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,64,128,1,fp8,fp8,0,0.06727466483910878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,fp8,0,0.30832000573476154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,fp8,0,0.18980266650517783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,64,0,1,fp8,fp8,0,0.28987733523050946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,float16,0,0.0673333356777827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,float16,0,0.18765334288279215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,fp8,0,0.07019733389218648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,float16,0,0.062277331948280334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,fp8,0,0.18515199422836304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,64,0,1,fp8,fp8,0,0.1769919991493225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,float16,0,0.18278400103251138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,64,128,1,fp8,fp8,0,0.06117333471775055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,64,0,1,fp8,fp8,0,0.17088532447814941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,float16,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,fp8,0,0.06565333406130473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,float16,0,0.18387200435002646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,64,128,1,fp8,fp8,0,0.06121600170930227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,fp8,0,0.18310399850209555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,64,0,1,fp8,fp8,0,0.17082667350769043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,float16,0,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,float16,0,0.1844480037689209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,fp8,0,0.06637333333492279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,64,128,1,fp8,fp8,0,0.06229866544405619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,fp8,0,0.18688533703486124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,float16,0,0.052671998739242554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,64,0,1,fp8,fp8,0,0.1726026733716329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,float16,0,0.12983466188112894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,fp8,0,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,64,128,1,fp8,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,fp8,0,0.13038399815559387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,64,0,1,fp8,fp8,0,0.12156266967455547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,float16,0,0.05329599976539612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,float16,0,0.05338133374849955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,float16,0,0.13053866227467856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,float16,0,0.13125333189964294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,fp8,0,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,64,128,1,fp8,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,fp8,0,0.13014933466911316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,64,0,1,fp8,fp8,0,0.12205866972605388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,fp8,0,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,64,128,1,fp8,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,fp8,0,0.1297920048236847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,64,0,1,fp8,fp8,0,0.12204266587893169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,float16,0,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,float16,0,0.12958932916323343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,fp8,0,0.053685332338015236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,64,128,1,fp8,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,fp8,0,0.13074666261672974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,64,0,1,fp8,fp8,0,0.12307199835777283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,float16,0,0.4639413356781006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,fp8,0,0.4694186846415202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,64,128,1,fp8,fp8,0,0.4345066547393799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,float16,0,1.0281973679860432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,float16,0,0.47704533735911053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,fp8,0,1.0286133289337158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,64,0,1,fp8,fp8,0,0.9497280120849609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,fp8,0,0.48206933339436847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,64,128,1,fp8,fp8,0,0.4482133388519287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,float16,0,1.04311998685201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,float16,0,0.4930826822916667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,fp8,0,1.0447306632995605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,64,0,1,fp8,fp8,0,0.9670133590698242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,fp8,0,0.4986026684443156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,64,128,1,fp8,fp8,0,0.4662880102793376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,float16,0,1.053978681564331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,float16,0,0.26839999357859295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,fp8,0,1.06222931543986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,fp8,0,0.2742080092430115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,float16,0,0.5676853259404501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,64,128,1,fp8,fp8,0,0.2598506609598796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,float16,0,0.2362933357556661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,fp8,0,0.5724533398946127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,64,0,1,fp8,fp8,0,0.9860906600952148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,64,0,1,fp8,fp8,0,0.5328960021336874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,fp8,0,0.2405173381169637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,float16,0,0.5300426483154297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,64,128,1,fp8,fp8,0,0.22489599386850992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,fp8,0,0.5331733226776123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,float16,0,0.24211200078328451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,64,0,1,fp8,fp8,0,0.49723200003306073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,fp8,0,0.2453440030415853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,float16,0,0.5380213260650635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,64,128,1,fp8,fp8,0,0.23282132546106973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,fp8,0,0.5386879841486613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,float16,0,0.2525706688563029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,64,0,1,fp8,fp8,0,0.5041173299153646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,fp8,0,0.25482134024302167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,float16,0,0.5479573408762614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,64,128,1,fp8,fp8,0,0.2421440084775289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,float16,0,0.14173332850138345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,fp8,0,0.5515573422114054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,64,0,1,fp8,fp8,0,0.5126506487528483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,float16,0,0.3062293330828349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,fp8,0,0.14615466197331747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,64,128,1,fp8,fp8,0,0.14103999733924866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,fp8,0,0.3110719919204712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,float16,0,0.12158933281898499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,64,0,1,fp8,fp8,0,0.2894773284594218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,float16,0,0.28401599327723187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,fp8,0,0.123471995194753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,64,128,1,fp8,fp8,0,0.11795199910799663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,fp8,0,0.2860213319460551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,64,0,1,fp8,fp8,0,0.26609599590301514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,float16,0,0.12531733512878418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,fp8,0,0.1260693371295929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,float16,0,0.2890506585439046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,64,128,1,fp8,fp8,0,0.12366400162378947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,fp8,0,0.28949334224065143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,64,0,1,fp8,fp8,0,0.2735626697540283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,float16,0,0.13185066978136697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,float16,0,0.2935626705487569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,fp8,0,0.1332373321056366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,64,128,1,fp8,fp8,0,0.12980266412099203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,fp8,0,0.29710400104522705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,64,0,1,fp8,fp8,0,0.27898667256037396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,float16,0,0.08064533273379008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,float16,0,0.17410133282343546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,fp8,0,0.08237866560618083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,fp8,0,0.07400533556938171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,64,128,1,fp8,fp8,0,0.0823520024617513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,fp8,0,0.1760586698849996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,64,0,1,fp8,fp8,0,0.1677173376083374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,float16,0,0.07225066423416138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,float16,0,0.16612266500790915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,64,128,1,fp8,fp8,0,0.06938133140405019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,fp8,0,0.16741333405176798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,64,0,1,fp8,fp8,0,0.15445866187413534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,float16,0,0.07439466814200084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,float16,0,0.16714133818944296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,fp8,0,0.0763679991165797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,64,128,1,fp8,fp8,0,0.07027733325958252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,64,128,1,fp8,fp8,0,0.07230400045712788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,fp8,0,0.16801599661509195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,64,0,1,fp8,fp8,0,0.15639467040697733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,float16,0,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,float16,0,0.1686240037282308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,fp8,0,0.051632001996040344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,64,128,1,fp8,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,fp8,0,0.07659199833869934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,fp8,0,0.1705440084139506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,float16,0,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,64,0,1,fp8,fp8,0,0.15916799505551657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,float16,0,0.04994133114814758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,float16,0,0.11161067088445027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,fp8,0,0.11356266339619954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,64,0,1,fp8,fp8,0,0.1069493293762207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,float16,0,0.1092693308989207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,fp8,0,0.04664533336957296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,64,128,1,fp8,fp8,0,0.04580266773700714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,64,128,1,fp8,fp8,0,0.045466666420300804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,fp8,0,0.11136533816655476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,64,0,1,fp8,fp8,0,0.10287466645240784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,float16,0,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,float16,0,0.10991467038790385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,fp8,0,0.0476693312327067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,64,128,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,fp8,0,0.1113973359266917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,fp8,0,0.1111893355846405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,64,0,1,fp8,fp8,0,0.10322667161623637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,float16,0,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,float16,0,0.11148800452550252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,64,128,1,fp8,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,fp8,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,64,0,1,fp8,fp8,0,0.10358933607737224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,float16,0,0.07673599819342296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,fp8,0,0.07845333218574524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,64,0,1,fp8,fp8,0,0.07235200206438701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,float16,0,0.03711466739575068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,float16,0,0.07824000219504039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,fp8,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,64,128,1,fp8,fp8,0,0.035301332672437034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,fp8,0,0.07827199995517731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,64,0,1,fp8,fp8,0,0.07418133318424225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,float16,0,0.037530665596326195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,64,0,1,fp8,fp8,0,0.07387199997901917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,float16,0,0.07770666480064392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,float16,0,0.07843733330567677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,fp8,0,0.037392000357309975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,64,128,1,fp8,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,fp8,0,0.07844266792138417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,fp8,0,0.07851199805736542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,float16,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,64,128,1,fp8,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,64,0,1,fp8,fp8,0,0.07256000240643819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,float16,0,0.35598401228586835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,fp8,0,0.35749868551890057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,64,128,1,fp8,fp8,0,0.332586665948232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,float16,0,0.6607573429743449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,64,0,1,fp8,fp8,0,0.6138453483581543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,fp8,0,0.6596693197886149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,float16,0,0.3667840162913005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,fp8,0,0.36799999078114826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,float16,0,0.6708906491597494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,64,128,1,fp8,fp8,0,0.3441280126571655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,float16,0,0.3819040060043335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,fp8,0,0.6700373490651449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,64,0,1,fp8,fp8,0,0.6259680191675822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,fp8,0,0.381386677424113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,float16,0,0.6836053530375162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,64,0,1,fp8,fp8,0,0.6369653145472208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,64,128,1,fp8,fp8,0,0.3585066795349121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,float16,0,0.2107893427213033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,fp8,0,0.6868693033854166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,fp8,0,0.3737493356068929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,float16,0,0.37110400199890137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,fp8,0,0.2136533260345459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,64,128,1,fp8,fp8,0,0.20259199539820352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,64,0,1,fp8,fp8,0,0.3503359953562419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,float16,0,0.18106132745742798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,float16,0,0.3431679805119832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,fp8,0,0.18318933248519897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,64,128,1,fp8,fp8,0,0.17488000790278116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,fp8,0,0.34257598718007404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,64,128,1,fp8,fp8,0,0.1789813240369161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,64,0,1,fp8,fp8,0,0.3238079945246379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,float16,0,0.1865760087966919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,float16,0,0.34530667463938397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,float16,0,0.3564480145772298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,fp8,0,0.18824533621470133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,fp8,0,0.34864532947540283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,64,0,1,fp8,fp8,0,0.3286186655362447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,fp8,0,0.35868267218271893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,float16,0,0.19667200247446695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,fp8,0,0.19746132691701254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,64,128,1,fp8,fp8,0,0.18793600797653198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,float16,0,0.11311466495196025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,fp8,0,0.2048906683921814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,64,0,1,fp8,fp8,0,0.3380320072174072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,float16,0,0.20267200469970703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,fp8,0,0.09912000099817912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,fp8,0,0.11601066589355469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,64,128,1,fp8,fp8,0,0.11352533102035522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,64,0,1,fp8,fp8,0,0.1946293314297994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,float16,0,0.09723200400670369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,float16,0,0.18534932533899942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,fp8,0,0.10194666186968486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,64,128,1,fp8,fp8,0,0.09274133046468098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,fp8,0,0.1872053345044454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,64,0,1,fp8,fp8,0,0.17402132352193198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,float16,0,0.1032426655292511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,float16,0,0.09956266482671101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,float16,0,0.1872426668802897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,64,128,1,fp8,fp8,0,0.09675733248392741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,fp8,0,0.18919465939203897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,64,0,1,fp8,fp8,0,0.17849600315093994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,float16,0,0.19155200322469076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,fp8,0,0.10526399811108907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,64,128,1,fp8,fp8,0,0.10285333792368571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,64,128,1,fp8,fp8,0,0.0631573349237442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,fp8,0,0.19451733430226645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,64,0,1,fp8,fp8,0,0.18473599354426065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,float16,0,0.062122667829195656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,float16,0,0.11750933527946472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,fp8,0,0.06573333342870076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,64,128,1,fp8,fp8,0,0.055888002117474876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,fp8,0,0.11972266435623169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,64,0,1,fp8,fp8,0,0.11282666524251302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,float16,0,0.058133333921432495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,float16,0,0.11282666524251302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,fp8,0,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,fp8,0,0.11388799548149109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,64,128,1,fp8,fp8,0,0.05780800183614095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,64,0,1,fp8,fp8,0,0.10725866754849751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,float16,0,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,float16,0,0.11414399743080139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,fp8,0,0.06002666552861532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,fp8,0,0.11629333098729451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,64,0,1,fp8,fp8,0,0.10636267066001892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,float16,0,0.060133333007494606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,float16,0,0.11521599690119426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,fp8,0,0.06154666841030121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,float16,0,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,64,128,1,fp8,fp8,0,0.059258664647738137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,fp8,0,0.11624000469843547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,64,0,1,fp8,fp8,0,0.10939733187357585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,64,0,1,fp8,fp8,0,0.07620266576608022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,float16,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,float16,0,0.07832000156243642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,fp8,0,0.043925335009892784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,64,128,1,fp8,fp8,0,0.04295999805132548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,fp8,0,0.0803466687599818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,float16,0,0.04203199843565623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,fp8,0,0.04350399971008301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,64,128,1,fp8,fp8,0,0.0401653324564298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,fp8,0,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,64,0,1,fp8,fp8,0,0.0721973329782486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,float16,0,0.04350399971008301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,float16,0,0.07706133524576823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,64,128,1,fp8,fp8,0,0.041375999649365745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,fp8,0,0.07844266792138417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,fp8,0,0.07863466441631317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,64,0,1,fp8,fp8,0,0.07273066540559132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,float16,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,float16,0,0.07829866806666057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,64,128,1,fp8,fp8,0,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,64,0,1,fp8,fp8,0,0.07387199997901917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,float16,0,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,float16,0,0.061994666854540505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,fp8,0,0.03275199979543686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,64,128,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,fp8,0,0.06198933223883311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,64,0,1,fp8,fp8,0,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,float16,0,0.062047998110453285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,fp8,0,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,64,128,1,fp8,fp8,0,0.03195200115442276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,fp8,0,0.062261333068211876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,fp8,0,0.061520000298817955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,64,0,1,fp8,fp8,0,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,float16,0,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,float16,0,0.06128533184528351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,float16,0,0.06113066772619883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,fp8,0,0.032229334115982056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,64,128,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,64,0,1,fp8,fp8,0,0.05807466804981232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,float16,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,fp8,0,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,64,128,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,fp8,0,0.06188266475995382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,64,0,1,fp8,fp8,0,0.05781333148479462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,float16,0,0.4806133508682251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,fp8,0,0.4813493490219116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,float16,0,0.7137706279754639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,64,128,1,fp8,fp8,0,0.4419999917348226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,fp8,0,0.7135840257008871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,64,0,1,fp8,fp8,0,0.6585280100504557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,float16,0,0.49767998854319256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,float16,0,0.7320586840311686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,fp8,0,0.5006719827651978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,64,128,1,fp8,fp8,0,0.45533867677052814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,fp8,0,0.7345279852549235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,64,0,1,fp8,fp8,0,0.6703253587086996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,fp8,0,0.510752002398173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,float16,0,0.5122026602427164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,float16,0,0.743664026260376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,64,128,1,fp8,fp8,0,0.4684106508890788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,float16,0,0.27565866708755493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,fp8,0,0.7455893357594808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,64,0,1,fp8,fp8,0,0.6871146361033121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,float16,0,0.3972959915796916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,fp8,0,0.2786773244539897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,64,128,1,fp8,fp8,0,0.2644853393236796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,fp8,0,0.40243732929229736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,64,0,1,fp8,fp8,0,0.3779040177663167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,float16,0,0.23843199014663696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,float16,0,0.36162134011586505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,fp8,0,0.2408533294995626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,64,128,1,fp8,fp8,0,0.22530666987101236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,fp8,0,0.364682674407959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,64,0,1,fp8,fp8,0,0.3403786818186442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,float16,0,0.244159996509552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,float16,0,0.3670346736907959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,fp8,0,0.24717867374420166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,64,128,1,fp8,fp8,0,0.23401067654291788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,fp8,0,0.3700480063756307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,64,0,1,fp8,fp8,0,0.347109317779541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,float16,0,0.25465599695841473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,fp8,0,0.38171199957529706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,float16,0,0.3803040186564128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,fp8,0,0.2564799984296163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,64,128,1,fp8,fp8,0,0.24235733350118002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,float16,0,0.14376533031463623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,64,0,1,fp8,fp8,0,0.3571306864420573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,float16,0,0.21308799584706625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,float16,0,0.12020799517631531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,fp8,0,0.14532267053922018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,64,128,1,fp8,fp8,0,0.14083733161290488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,fp8,0,0.21412267287572226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,64,0,1,fp8,fp8,0,0.20358934005101523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,float16,0,0.18849599361419678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,fp8,0,0.12138666709264119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,64,128,1,fp8,fp8,0,0.11585066715876262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,fp8,0,0.19107200702031454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,64,0,1,fp8,fp8,0,0.18075199921925864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,float16,0,0.12294933199882507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,float16,0,0.19170665740966797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,fp8,0,0.1240053375562032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,64,128,1,fp8,fp8,0,0.12164800365765889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,float16,0,0.19948800404866537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,fp8,0,0.1949653426806132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,64,0,1,fp8,fp8,0,0.1851253310839335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,float16,0,0.1318986713886261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,64,0,1,fp8,fp8,0,0.1930826703707377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,fp8,0,0.13190933068593344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,64,128,1,fp8,fp8,0,0.12882133324941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,64,128,1,fp8,fp8,0,0.08060800035794576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,fp8,0,0.20220265785853067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,float16,0,0.07783466577529907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,float16,0,0.11757333079973857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,fp8,0,0.0803306649128596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,64,128,1,fp8,fp8,0,0.06805866460005443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,fp8,0,0.11931199828783672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,64,0,1,fp8,fp8,0,0.11687999963760376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,float16,0,0.07022400200366974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,float16,0,0.10917866230010986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,fp8,0,0.07308266560236613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,fp8,0,0.07076266904671986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,fp8,0,0.11146133144696553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,64,0,1,fp8,fp8,0,0.10291733344395955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,float16,0,0.07218666871388753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,float16,0,0.1109279990196228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,64,128,1,fp8,fp8,0,0.06807999809583028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,64,128,1,fp8,fp8,0,0.07218666871388753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,fp8,0,0.11243733763694763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,fp8,0,0.11487999558448792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,64,0,1,fp8,fp8,0,0.1049066682656606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,float16,0,0.07321600119272868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,float16,0,0.11277332901954651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,fp8,0,0.07457600037256877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,64,0,1,fp8,fp8,0,0.10758933424949646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,64,0,1,fp8,fp8,0,0.07052800059318542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,float16,0,0.045642669002215065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,float16,0,0.07389333347479503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,fp8,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,64,128,1,fp8,fp8,0,0.04620266457398733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,fp8,0,0.07043733199437459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,fp8,0,0.07437866429487865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,float16,0,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,float16,0,0.07030933101971944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,float16,0,0.07044800122578938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,fp8,0,0.044981335600217186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,64,128,1,fp8,fp8,0,0.04141866664091746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,64,0,1,fp8,fp8,0,0.06639466683069865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,float16,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,fp8,0,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,fp8,0,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,64,128,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,fp8,0,0.07157866656780243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,64,0,1,fp8,fp8,0,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,float16,0,0.04569066564242045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,float16,0,0.07147199908892314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,64,128,1,fp8,fp8,0,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,fp8,0,0.07394666473070781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,fp8,0,0.04957333207130432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,64,0,1,fp8,fp8,0,0.06950399776299794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,float16,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,float16,0,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,fp8,0,0.031109333038330078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,fp8,0,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,64,128,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,64,0,1,fp8,fp8,0,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,float16,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,64,128,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,float16,0,0.04782933493455251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,64,128,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,fp8,0,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,64,0,1,fp8,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,float16,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,64,128,1,fp8,fp8,0,0.03035733352104823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,fp8,0,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,64,0,1,fp8,fp8,0,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,float16,0,0.03014400104681651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,float16,0,0.04820266862710317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,fp8,0,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,64,128,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,64,0,1,fp8,fp8,0,0.045797333121299744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,float16,0,0.045610666275024414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,float16,0,0.04565866788228353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,64,0,1,fp8,fp8,0,0.04304533203442892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,float16,0,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,fp8,0,0.02903999884923299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,64,128,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,fp8,0,0.04565866788228353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,64,0,1,fp8,fp8,0,0.04312000175317129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,float16,0,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,64,128,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,fp8,0,0.04569066564242045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,64,0,1,fp8,fp8,0,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,float16,0,0.04553066690762838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,64,128,1,fp8,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,float16,0,0.35608001550038654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,fp8,0,0.04553066690762838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,fp8,0,0.35891199111938477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,64,0,1,fp8,fp8,0,0.04451733330885569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,float16,0,0.4732160170873006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,64,0,1,fp8,fp8,0,0.4405119816462199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,fp8,0,0.47306664784749347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,64,128,1,fp8,fp8,0,0.3316799998283386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,float16,0,0.36979198455810547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,float16,0,0.4910240173339844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,fp8,0,0.3727840185165405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,float16,0,0.3825920025507609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,64,128,1,fp8,fp8,0,0.3468266725540161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,64,0,1,fp8,fp8,0,0.45370133717854816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,fp8,0,0.4891146818796794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,float16,0,0.5017333428064982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,fp8,0,0.38468801975250244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,64,128,1,fp8,fp8,0,0.3580053249994914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,fp8,0,0.502239982287089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,64,128,1,fp8,fp8,0,0.20424532890319824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,64,0,1,fp8,fp8,0,0.4660960038503011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,float16,0,0.21034133434295654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,float16,0,0.27108800411224365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,fp8,0,0.21414933602015176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,fp8,0,0.27729066212972003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,64,0,1,fp8,fp8,0,0.2617493271827698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,float16,0,0.18107199668884277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,float16,0,0.2412373423576355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,fp8,0,0.18131732940673828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,64,128,1,fp8,fp8,0,0.173962672551473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,fp8,0,0.18863999843597412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,fp8,0,0.2425866723060608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,64,0,1,fp8,fp8,0,0.23137599229812622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,float16,0,0.1872319976488749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,float16,0,0.2476960023244222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,64,128,1,fp8,fp8,0,0.1791306734085083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,fp8,0,0.2486720085144043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,64,128,1,fp8,fp8,0,0.187882661819458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,64,0,1,fp8,fp8,0,0.23725332816441855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,float16,0,0.19567465782165527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,float16,0,0.2553973396619161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,fp8,0,0.1981066664059957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,fp8,0,0.2607733408610026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,64,0,1,fp8,fp8,0,0.24671467145284018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,float16,0,0.1111253301302592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,float16,0,0.14657599727312723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,fp8,0,0.11409599582354228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,64,128,1,fp8,fp8,0,0.11264533797899882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,fp8,0,0.14857066671053568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,64,0,1,fp8,fp8,0,0.14352533221244812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,float16,0,0.09612799684206645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,float16,0,0.13065066933631897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,fp8,0,0.09833066662152608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,64,128,1,fp8,fp8,0,0.09057600299517314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,fp8,0,0.1317813297112783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,64,0,1,fp8,fp8,0,0.123690664768219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,float16,0,0.09815999865531921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,float16,0,0.13301333785057068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,fp8,0,0.10086400310198466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,64,128,1,fp8,fp8,0,0.09472533067067464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,fp8,0,0.13548800349235535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,64,0,1,fp8,fp8,0,0.1272426644961039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,float16,0,0.10204266508420308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,float16,0,0.1365493337313334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,fp8,0,0.10495466987291972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,fp8,0,0.06413866579532623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,64,128,1,fp8,fp8,0,0.10129599769910176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,fp8,0,0.1397706667582194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,64,0,1,fp8,fp8,0,0.1338879962762197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,float16,0,0.05789866546789805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,float16,0,0.06160533428192139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,float16,0,0.08261866867542267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,64,128,1,fp8,fp8,0,0.06205333272616068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,fp8,0,0.08483733733495076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,64,0,1,fp8,fp8,0,0.0803306649128596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,float16,0,0.07838933169841766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,fp8,0,0.057520002126693726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,float16,0,0.078575998544693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,64,128,1,fp8,fp8,0,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,64,128,1,fp8,fp8,0,0.054005334774653115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,fp8,0,0.07858666777610779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,64,0,1,fp8,fp8,0,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,float16,0,0.05745066702365875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,fp8,0,0.05899199843406677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,fp8,0,0.08045866588751475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,64,0,1,fp8,fp8,0,0.0745119998852412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,float16,0,0.058176000912984215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,float16,0,0.08042133351167043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,fp8,0,0.06001066664854685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,64,128,1,fp8,fp8,0,0.05684266487757365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,fp8,0,0.08296533425649007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,64,0,1,fp8,fp8,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,fp8,0,0.05594133337338766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,float16,0,0.04133866727352142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,float16,0,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,fp8,0,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,64,128,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,64,128,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,64,0,1,fp8,fp8,0,0.05202666421731313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,float16,0,0.05212266743183136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,float16,0,0.05356800059477488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,fp8,0,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,fp8,0,0.05340266724427541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,64,0,1,fp8,fp8,0,0.0491893341143926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,float16,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,fp8,0,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,float16,0,0.053082664807637535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,64,128,1,fp8,fp8,0,0.03741333385308584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,fp8,0,0.05312533179918925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,64,0,1,fp8,fp8,0,0.05006400247414907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,float16,0,0.04109866668780645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,fp8,0,0.04022933294375738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,64,128,1,fp8,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,fp8,0,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,64,0,1,fp8,fp8,0,0.05171733101209005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,float16,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,float16,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,64,128,1,fp8,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,fp8,0,0.03956799954175949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,64,0,1,fp8,fp8,0,0.03945599993069967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,fp8,0,0.027893332143624622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,64,128,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,64,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,64,128,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,fp8,0,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,64,0,1,fp8,fp8,0,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,float16,0,0.028927999238173168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,float16,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,64,128,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,64,128,1,fp8,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,64,0,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,float16,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,64,0,1,fp8,fp8,0,0.03523733218510946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,64,0,1,fp8,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,float16,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,float16,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,float16,0,0.03748266647259394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,fp8,0,0.03730666637420654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,64,128,1,fp8,fp8,0,0.024112001061439514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,fp8,0,0.03771200031042099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,64,128,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,64,0,1,fp8,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,float16,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,float16,0,0.4194186528523763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,64,128,1,fp8,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,64,0,1,fp8,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,float16,0,0.49219731489817303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,fp8,0,0.41812264919281006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,64,128,1,fp8,fp8,0,0.38995198408762616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,fp8,0,0.4936106602350871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,64,0,1,fp8,fp8,0,0.45578666528066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,float16,0,0.4240640004475911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,float16,0,0.49641601244608563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,fp8,0,0.42450666427612305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,64,128,1,fp8,fp8,0,0.39505600929260254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,fp8,0,0.49642666180928546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,64,0,1,fp8,fp8,0,0.4668639898300171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,float16,0,0.4325973192850749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,float16,0,0.5048799912134806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,fp8,0,0.4339253505071004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,64,128,1,fp8,fp8,0,0.41808001200358075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,fp8,0,0.5090719858805338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,float16,0,0.22637333472569784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,64,0,1,fp8,fp8,0,0.48959465821584064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,float16,0,0.2667093276977539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,64,0,1,fp8,fp8,0,0.2658613324165344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,fp8,0,0.22428266207377115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,64,128,1,fp8,fp8,0,0.22816000382105509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,fp8,0,0.2664533257484436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,float16,0,0.21919467051823935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,float16,0,0.2579200069109599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,fp8,0,0.21794666846593222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,64,128,1,fp8,fp8,0,0.2034613291422526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,float16,0,0.260261336962382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,fp8,0,0.25677333275477093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,64,0,1,fp8,fp8,0,0.24020800987879434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,float16,0,0.22170132398605347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,fp8,0,0.2206453283627828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,64,128,1,fp8,fp8,0,0.2072426676750183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,fp8,0,0.25940799713134766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,64,0,1,fp8,fp8,0,0.24449066321055093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,float16,0,0.22603732347488403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,float16,0,0.264847993850708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,fp8,0,0.22580800453821817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,64,128,1,fp8,fp8,0,0.21426665782928467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,fp8,0,0.2664373318354289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,64,0,1,fp8,fp8,0,0.25109867254892987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,float16,0,0.12343999743461609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,float16,0,0.1453120013078054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,fp8,0,0.12081600228945415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,64,128,1,fp8,fp8,0,0.12199999888737996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,fp8,0,0.14251733819643655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,64,0,1,fp8,fp8,0,0.14382933576901755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,float16,0,0.11549333731333415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,64,0,1,fp8,fp8,0,0.12943466504414877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,float16,0,0.11550399661064148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,float16,0,0.1369493305683136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,fp8,0,0.11479467153549194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,64,128,1,fp8,fp8,0,0.1074079970518748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,fp8,0,0.13577600320180258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,float16,0,0.13900799552599588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,fp8,0,0.11731732885042827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,64,128,1,fp8,fp8,0,0.11110400160153706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,fp8,0,0.13806933164596558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,64,0,1,fp8,fp8,0,0.131632000207901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,fp8,0,0.14216533303260803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,float16,0,0.12097600102424622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,float16,0,0.14220800002415976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,fp8,0,0.12197333574295044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,64,128,1,fp8,fp8,0,0.11547199885050456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,64,0,1,fp8,fp8,0,0.13566933075586954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,float16,0,0.0662666658560435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,float16,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,float16,0,0.07887466748555501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,fp8,0,0.06530133386452992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,fp8,0,0.06644266843795776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,fp8,0,0.0784800002972285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,64,128,1,fp8,fp8,0,0.07002133131027222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,fp8,0,0.07970666885375977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,64,0,1,fp8,fp8,0,0.0824480007092158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,float16,0,0.07805866499741872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,64,128,1,fp8,fp8,0,0.06224533418814341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,64,128,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,64,0,1,fp8,fp8,0,0.07348800202210744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,64,0,1,fp8,fp8,0,0.07309866448243459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,float16,0,0.06458666423956554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,float16,0,0.07831466694672902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,64,128,1,fp8,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,fp8,0,0.06597866614659627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,fp8,0,0.07835733393828075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,float16,0,0.06609599788983662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,float16,0,0.07930133243401845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,fp8,0,0.0662666658560435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,fp8,0,0.0782773345708847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,64,0,1,fp8,fp8,0,0.07627200086911519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,float16,0,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,float16,0,0.05166399975617727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,fp8,0,0.04187199970086416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,64,128,1,fp8,fp8,0,0.04075733323891958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,fp8,0,0.052005335688591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,64,0,1,fp8,fp8,0,0.04993066688378652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,float16,0,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,float16,0,0.049738665421803795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,fp8,0,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,64,128,1,fp8,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,64,0,1,fp8,fp8,0,0.047983999053637184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,float16,0,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,float16,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,64,128,1,fp8,fp8,0,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,fp8,0,0.05161599814891815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,64,0,1,fp8,fp8,0,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,float16,0,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,fp8,0,0.0425546665986379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,64,128,1,fp8,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,fp8,0,0.05188799897829691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,64,128,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,64,0,1,fp8,fp8,0,0.04990933338801066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,float16,0,0.02698666602373123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,float16,0,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,fp8,0,0.03349866718053818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,64,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,64,128,1,fp8,fp8,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,64,128,1,fp8,fp8,0,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,64,0,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,float16,0,0.032655999064445496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,float16,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,fp8,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,fp8,0,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,64,0,1,fp8,fp8,0,0.031845333675543465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,float16,0,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,64,128,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,64,0,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,64,128,1,fp8,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,fp8,0,0.029125332832336426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,64,0,1,fp8,fp8,0,0.028938665986061096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,float16,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,64,128,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,64,0,1,fp8,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,64,0,1,fp8,fp8,0,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,fp8,0,0.022255999346574146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,float16,0,0.028186666468779247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,fp8,0,0.02845866729815801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,float16,0,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,fp8,0,0.02258133391539256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,64,0,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,float16,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,64,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,float16,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,64,128,1,fp8,fp8,0,0.020224000016848247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,64,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,float16,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,64,128,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,64,0,1,fp8,fp8,0,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,float16,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,64,128,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,float16,0,0.410591999689738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,64,0,1,fp8,fp8,0,0.026709333062171936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,float16,0,0.4151680072148641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,fp8,0,0.40932265917460126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,fp8,0,0.404207984606425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,64,128,1,fp8,fp8,0,0.3789173364639282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,64,0,1,fp8,fp8,0,0.3845866521199544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,float16,0,0.4140106836954753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,float16,0,0.4164693355560303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,fp8,0,0.4137813250223796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,64,0,1,fp8,fp8,0,0.3877813418706258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,64,128,1,fp8,fp8,0,0.3852320114771525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,fp8,0,0.41280531883239746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,float16,0,0.4222293297449748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,float16,0,0.4278133312861125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,fp8,0,0.42347200711568195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,64,128,1,fp8,fp8,0,0.4107360045115153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,fp8,0,0.42906665802001953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,float16,0,0.21987199783325195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,64,0,1,fp8,fp8,0,0.40858133633931476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,64,0,1,fp8,fp8,0,0.22377600272496542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,float16,0,0.2227733333905538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,fp8,0,0.21626667181650797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,64,128,1,fp8,fp8,0,0.22104533513387045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,fp8,0,0.21176532904307047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,fp8,0,0.2218666672706604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,fp8,0,0.21377599239349365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,float16,0,0.21369600296020508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,float16,0,0.21554666757583618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,64,128,1,fp8,fp8,0,0.19748800992965698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,64,0,1,fp8,fp8,0,0.20147200425465903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,float16,0,0.2158720095952352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,float16,0,0.21902932723363241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,fp8,0,0.21389333407084146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,64,128,1,fp8,fp8,0,0.20121065775553384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,fp8,0,0.21798932552337646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,64,0,1,fp8,fp8,0,0.20360000928243002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,float16,0,0.22099733352661133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,float16,0,0.2229599952697754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,64,0,1,fp8,fp8,0,0.2113920052846273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,fp8,0,0.22148799896240234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,float16,0,0.1209386686484019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,64,128,1,fp8,fp8,0,0.21173866589864096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,fp8,0,0.22302399079004923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,float16,0,0.11865066488583882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,fp8,0,0.11827733119328816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,64,128,1,fp8,fp8,0,0.11953600247701009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,fp8,0,0.11993066469828288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,64,0,1,fp8,fp8,0,0.12134400010108948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,float16,0,0.11311466495196025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,float16,0,0.11521599690119426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,fp8,0,0.11114666859308879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,64,128,1,fp8,fp8,0,0.10604799787203471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,fp8,0,0.1144586702187856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,64,0,1,fp8,fp8,0,0.10905067125956218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,float16,0,0.11370133360226949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,float16,0,0.11528533697128296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,fp8,0,0.11410666505495708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,64,128,1,fp8,fp8,0,0.10850666960080464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,fp8,0,0.11545067032178243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,64,0,1,fp8,fp8,0,0.11135466893513997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,float16,0,0.11868266264597575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,float16,0,0.1195146640141805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,fp8,0,0.11715733011563619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,64,128,1,fp8,fp8,0,0.11400000254313152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,fp8,0,0.12097066640853882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,64,0,1,fp8,fp8,0,0.11539733409881592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,64,0,1,fp8,fp8,0,0.06809066732724507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,float16,0,0.06436799963315327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,float16,0,0.06373333434263866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,float16,0,0.06611733138561249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,fp8,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,64,128,1,fp8,fp8,0,0.06807999809583028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,fp8,0,0.0656160016854604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,float16,0,0.06423466900984447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,float16,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,fp8,0,0.06229333579540253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,64,128,1,fp8,fp8,0,0.0595360000928243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,fp8,0,0.0629013329744339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,64,0,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,float16,0,0.06487999856472015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,64,0,1,fp8,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,float16,0,0.06507200002670288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,fp8,0,0.06433600187301636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,64,128,1,fp8,fp8,0,0.06121066709359487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,fp8,0,0.06458666423956554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,float16,0,0.06633066634337108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,fp8,0,0.0660159985224406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,64,128,1,fp8,fp8,0,0.06333333253860474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,64,128,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,fp8,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,64,0,1,fp8,fp8,0,0.06408533453941345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,float16,0,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,float16,0,0.04354666670163473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,fp8,0,0.04065066576004028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,fp8,0,0.04350399971008301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,64,0,1,fp8,fp8,0,0.04284266630808512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,float16,0,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,float16,0,0.042463997999827065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,float16,0,0.04218133290608724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,fp8,0,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,64,128,1,fp8,fp8,0,0.03952533255020777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,fp8,0,0.0432533323764801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,64,0,1,fp8,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,fp8,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,64,128,1,fp8,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,64,0,1,fp8,fp8,0,0.04029866556326548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,float16,0,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,float16,0,0.042303999265034996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,64,128,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,fp8,0,0.04193066557248434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,64,0,1,fp8,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,float16,0,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,float16,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,64,128,1,fp8,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,64,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,64,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,float16,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,64,128,1,fp8,fp8,0,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,64,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,float16,0,0.026352000733216602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,64,128,1,fp8,fp8,0,0.02672533442576726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,float16,0,0.026672000686327618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,64,0,1,fp8,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,float16,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,64,128,1,fp8,fp8,0,0.026373334228992462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,float16,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,float16,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,64,128,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,float16,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,64,128,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,64,0,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,fp8,0,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,64,128,1,fp8,fp8,0,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,64,0,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,fp8,0,0.02232533444960912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,64,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,float16,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,64,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,fp8,0,0.020560000091791153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,64,128,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,64,0,1,fp8,fp8,0,0.020810666183630627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,fp8,0,0.02186133215824763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,float16,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,fp8,0,0.022554665803909302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,64,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,64,128,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,64,0,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,64,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,float16,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,fp8,0,0.020341333001852036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,64,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,float16,0,0.19539733727773032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,float16,0,0.19125866889953613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,fp8,0,0.1943946679433187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,64,128,1,fp8,fp8,0,0.18316266934076944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,float16,0,0.19589867194493613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,fp8,0,0.1900320053100586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,64,128,1,fp8,fp8,0,0.1851039926211039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,64,0,1,fp8,fp8,0,0.17880533138910928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,float16,0,0.1995840072631836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,fp8,0,0.1976426641146342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,fp8,0,0.19537067413330078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,64,0,1,fp8,fp8,0,0.1827359994252523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,64,128,1,fp8,fp8,0,0.19318934281667074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,float16,0,0.20641599098841348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,float16,0,0.20346667369206747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,fp8,0,0.20562666654586792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,fp8,0,0.20134933789571127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,float16,0,0.11103999614715576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,64,0,1,fp8,fp8,0,0.19196800390879312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,float16,0,0.10899733503659566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,fp8,0,0.1095306674639384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,float16,0,0.10326932867368062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,64,128,1,fp8,fp8,0,0.1114453375339508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,fp8,0,0.10260799527168274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,fp8,0,0.10832533240318298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,64,0,1,fp8,fp8,0,0.1097813347975413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,float16,0,0.10108266274134318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,64,128,1,fp8,fp8,0,0.09692800045013428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,fp8,0,0.09888000289599101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,64,0,1,fp8,fp8,0,0.09463999668757121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,float16,0,0.10541866223017375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,float16,0,0.1032480001449585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,fp8,0,0.1046506663163503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,64,128,1,fp8,fp8,0,0.09938133756319682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,fp8,0,0.10290132959683736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,64,0,1,fp8,fp8,0,0.09701866904894511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,float16,0,0.1092746655146281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,float16,0,0.10773866375287373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,fp8,0,0.10910399754842122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,64,128,1,fp8,fp8,0,0.10353599985440572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,fp8,0,0.10668266812960307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,64,0,1,fp8,fp8,0,0.10292266805966695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,float16,0,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,float16,0,0.06011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,fp8,0,0.06117866436640421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,64,128,1,fp8,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,fp8,0,0.058517331878344216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,64,0,1,fp8,fp8,0,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,float16,0,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,float16,0,0.06028800209363302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,float16,0,0.0577706644932429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,fp8,0,0.05945600072542826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,64,128,1,fp8,fp8,0,0.05621333420276642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,64,0,1,fp8,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,fp8,0,0.05793599784374237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,float16,0,0.059877331058184304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,fp8,0,0.05923733115196228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,64,128,1,fp8,fp8,0,0.057909334699312844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,64,0,1,fp8,fp8,0,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,float16,0,0.06227200229962667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,float16,0,0.03875199953715006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,float16,0,0.0598826656738917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,fp8,0,0.03872533390919367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,fp8,0,0.0621066689491272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,64,128,1,fp8,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,fp8,0,0.05966933568318685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,64,0,1,fp8,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,float16,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,float16,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,64,128,1,fp8,fp8,0,0.0386559988061587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,fp8,0,0.03721066564321518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,float16,0,0.038378665844599404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,64,0,1,fp8,fp8,0,0.037418665985266365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,float16,0,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,64,128,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,64,0,1,fp8,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,float16,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,fp8,0,0.037445334096749626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,fp8,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,64,128,1,fp8,fp8,0,0.036559998989105225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,fp8,0,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,64,0,1,fp8,fp8,0,0.03594133257865906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,float16,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,float16,0,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,64,128,1,fp8,fp8,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,64,0,1,fp8,fp8,0,0.03648533423741659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,float16,0,0.024959998826185863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,float16,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,64,128,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,64,0,1,fp8,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,64,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,64,128,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,64,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,64,0,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,float16,0,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,64,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,64,128,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,64,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,float16,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,64,128,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,64,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,64,128,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,64,0,1,fp8,fp8,0,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,float16,0,0.01814933369557063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,64,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,64,128,1,fp8,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,float16,0,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,64,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,float16,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,float16,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,64,128,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,64,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,64,128,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,64,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,float16,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,float16,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,fp8,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,64,128,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,64,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,float16,0,0.1091306706269582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,float16,0,0.10914132992426555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,fp8,0,0.10729599992434184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,64,128,1,fp8,fp8,0,0.10300266742706299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,fp8,0,0.10738133390744527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,64,0,1,fp8,fp8,0,0.10453333457310994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,float16,0,0.11070400476455688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,float16,0,0.11055999994277954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,fp8,0,0.10874666770299275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,64,128,1,fp8,fp8,0,0.10494400064150493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,fp8,0,0.1090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,64,0,1,fp8,fp8,0,0.10310399532318115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,float16,0,0.11386666695276897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,float16,0,0.11353066563606262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,fp8,0,0.11222400267918904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,64,128,1,fp8,fp8,0,0.11012799541155498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,fp8,0,0.11102400223414104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,64,0,1,fp8,fp8,0,0.11064533392588298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,64,128,1,fp8,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,float16,0,0.06217066446940104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,float16,0,0.0603413333495458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,float16,0,0.061280002196629844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,fp8,0,0.06091199815273285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,fp8,0,0.06163200239340464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,64,0,1,fp8,fp8,0,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,float16,0,0.05919999877611796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,fp8,0,0.05791999896367391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,64,128,1,fp8,fp8,0,0.055904000997543335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,fp8,0,0.05858133236567179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,fp8,0,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,64,0,1,fp8,fp8,0,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,float16,0,0.060309335589408875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,float16,0,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,fp8,0,0.059562668204307556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,64,128,1,fp8,fp8,0,0.05755199988683065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,64,0,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,float16,0,0.0601440022389094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,float16,0,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,fp8,0,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,64,128,1,fp8,fp8,0,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,64,0,1,fp8,fp8,0,0.06010133524735769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,float16,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,float16,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,64,128,1,fp8,fp8,0,0.03869866579771042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,fp8,0,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,fp8,0,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,64,0,1,fp8,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,float16,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,float16,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,fp8,0,0.0391146664818128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,64,128,1,fp8,fp8,0,0.03659733384847641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,64,0,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,float16,0,0.0393653338154157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,float16,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,fp8,0,0.038949333131313324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,64,128,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,64,0,1,fp8,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,64,0,1,fp8,fp8,0,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,float16,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,float16,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,64,128,1,fp8,fp8,0,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,fp8,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,fp8,0,0.02610666553179423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,64,128,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,fp8,0,0.026746665438016255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,float16,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,float16,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,64,128,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,64,128,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,64,0,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,float16,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,fp8,0,0.025765334566434223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,64,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,float16,0,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,64,0,1,fp8,fp8,0,0.025968000292778015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,64,128,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,64,0,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,64,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,float16,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,64,128,1,fp8,fp8,0,0.017871999492247898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,64,0,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,float16,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,fp8,0,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,float16,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,64,128,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,float16,0,0.01647466669480006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,64,128,1,fp8,fp8,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,float16,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,float16,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,float16,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,float16,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,64,128,1,fp8,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,64,0,1,fp8,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,float16,0,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,64,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,64,0,1,fp8,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,float16,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,64,128,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,float16,0,0.08040533463160197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,float16,0,0.07860800127188365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,fp8,0,0.08030400176843007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,64,128,1,fp8,fp8,0,0.07454399764537811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,fp8,0,0.07966933151086171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,64,0,1,fp8,fp8,0,0.0746559997399648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,64,128,1,fp8,fp8,0,0.07532266775767009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,float16,0,0.07975466549396515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,float16,0,0.08043733239173889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,fp8,0,0.07901333272457123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,fp8,0,0.08045866588751475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,64,0,1,fp8,fp8,0,0.07422400017579396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,float16,0,0.08226666847864787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,float16,0,0.08028266827265422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,fp8,0,0.08027199904123943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,64,128,1,fp8,fp8,0,0.0769760012626648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,fp8,0,0.08036266764005025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,float16,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,64,0,1,fp8,fp8,0,0.07687999804814656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,float16,0,0.047930667797724404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,64,128,1,fp8,fp8,0,0.04751466711362203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,fp8,0,0.04749333361784617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,64,0,1,fp8,fp8,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,float16,0,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,float16,0,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,64,128,1,fp8,fp8,0,0.043663998444875084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,fp8,0,0.04785066843032837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,64,0,1,fp8,fp8,0,0.04560533165931702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,float16,0,0.047584002216657005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,float16,0,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,float16,0,0.04842133323351542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,fp8,0,0.04692266881465912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,64,128,1,fp8,fp8,0,0.045498669147491455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,64,128,1,fp8,fp8,0,0.04785066843032837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,64,0,1,fp8,fp8,0,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,float16,0,0.04808000226815542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,fp8,0,0.04781333108743032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,fp8,0,0.048170665899912514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,64,0,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,float16,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,float16,0,0.031109333038330078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,float16,0,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,64,128,1,fp8,fp8,0,0.030581332743167877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,64,0,1,fp8,fp8,0,0.03048533449570338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,float16,0,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,64,128,1,fp8,fp8,0,0.02977599948644638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,64,0,1,fp8,fp8,0,0.030293333033720653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,float16,0,0.03108799954255422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,float16,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,64,128,1,fp8,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,64,0,1,fp8,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,float16,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,float16,0,0.03141333411137263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,64,128,1,fp8,fp8,0,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,fp8,0,0.031157332162062328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,64,0,1,fp8,fp8,0,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,64,128,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,64,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,float16,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,fp8,0,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,64,128,1,fp8,fp8,0,0.020741333564122517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,64,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,float16,0,0.02083733429511388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,64,128,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,64,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,64,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,64,0,1,fp8,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,float16,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,float16,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,64,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,64,0,1,fp8,fp8,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,float16,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,64,128,1,fp8,fp8,0,0.016149333367745083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,float16,0,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,64,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,fp8,0,0.015967999895413715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,64,128,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,64,0,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,64,0,1,fp8,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,64,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,64,128,1,fp8,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,fp8,0,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,64,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,64,128,1,fp8,fp8,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,float16,0,0.0681333343187968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,float16,0,0.06634666522343953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,fp8,0,0.068122665087382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,64,128,1,fp8,fp8,0,0.06368533273537953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,fp8,0,0.06640000144640605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,64,0,1,fp8,fp8,0,0.0641599992911021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,float16,0,0.06839466591676076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,float16,0,0.06820266445477803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,fp8,0,0.06639466683069865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,64,128,1,fp8,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,fp8,0,0.06815466781457265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,64,0,1,fp8,fp8,0,0.06386666496594746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,float16,0,0.06814933319886525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,float16,0,0.06850133339564006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,fp8,0,0.06680533289909363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,64,128,1,fp8,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,64,128,1,fp8,fp8,0,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,64,0,1,fp8,fp8,0,0.06500266492366791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,float16,0,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,float16,0,0.040021332601706185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,64,0,1,fp8,fp8,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,float16,0,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,float16,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,64,128,1,fp8,fp8,0,0.03719466676314672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,64,0,1,fp8,fp8,0,0.03789866715669632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,float16,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,float16,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,fp8,0,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,64,128,1,fp8,fp8,0,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,64,0,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,64,0,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,float16,0,0.04018666595220566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,float16,0,0.04002666721741358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,64,128,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,64,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,float16,0,0.02697066714366277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,float16,0,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,float16,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,fp8,0,0.026880001028378803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,64,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,float16,0,0.025920001169045765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,float16,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,fp8,0,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,64,128,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,64,128,1,fp8,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,64,0,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,fp8,0,0.026602665583292644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,64,0,1,fp8,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,fp8,0,0.01995733380317688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,float16,0,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,64,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,fp8,0,0.020197333147128422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,64,128,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,64,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,64,128,1,fp8,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,64,0,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,float16,0,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,float16,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,float16,0,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,fp8,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,64,0,1,fp8,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,64,128,1,fp8,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,64,128,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,64,0,1,fp8,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,64,128,1,fp8,fp8,0,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,64,0,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,float16,0,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,float16,0,0.05923733115196228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,fp8,0,0.0584799995024999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,64,128,1,fp8,fp8,0,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,fp8,0,0.058789332707722984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,64,0,1,fp8,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,float16,0,0.059861332178115845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,float16,0,0.06020799775918325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,64,0,1,fp8,fp8,0,0.05717866619427999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,fp8,0,0.05983999868233999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,64,128,1,fp8,fp8,0,0.055914665261904396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,fp8,0,0.06012799839178721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,float16,0,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,float16,0,0.05997333427270254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,64,128,1,fp8,fp8,0,0.05606933434804281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,64,128,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,fp8,0,0.059952000776926674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,64,0,1,fp8,fp8,0,0.058037335673967995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,float16,0,0.035946667194366455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,float16,0,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,fp8,0,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,64,0,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,64,0,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,float16,0,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,64,128,1,fp8,fp8,0,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,float16,0,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,float16,0,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,64,128,1,fp8,fp8,0,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,64,128,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,fp8,0,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,64,0,1,fp8,fp8,0,0.03436266630887985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,float16,0,0.035301332672437034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,float16,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,fp8,0,0.03600533306598663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,64,0,1,fp8,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,64,128,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,64,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,64,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,float16,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,64,128,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,64,128,1,fp8,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,64,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,float16,0,0.02493866781393687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,64,0,1,fp8,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,64,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,float16,0,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,64,128,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,float16,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,64,128,1,fp8,fp8,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,64,0,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,64,128,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,64,0,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,64,128,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,64,0,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,64,128,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,64,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,fp8,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,64,0,1,fp8,fp8,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,float16,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,64,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,float16,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,64,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,64,128,1,fp8,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,float16,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,64,128,1,fp8,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,float16,0,0.24898666143417358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,fp8,0,0.25065066417058307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,64,128,1,fp8,fp8,0,0.23704000314076742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,float16,0,0.2609226703643799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,fp8,0,0.2653119961420695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,float16,0,1.5467252731323242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,64,128,1,fp8,fp8,0,0.25011199712753296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,64,0,1,fp8,fp8,0,1.4372159639994304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,fp8,0,1.555557409922282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,float16,0,0.15002133448918661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,fp8,0,0.15270933508872986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,float16,0,1.5681813557942708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,64,128,1,fp8,fp8,0,0.14758933583895364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,fp8,0,1.566256046295166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,float16,0,0.8605013688405355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,64,0,1,fp8,fp8,0,1.451034704844157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,fp8,0,0.8653706709543864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,float16,0,0.13191999991734824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,fp8,0,0.13368533054987589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,64,0,1,fp8,fp8,0,0.8024426301320394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,64,128,1,fp8,fp8,0,0.12973866860071817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,float16,0,0.8438666661580404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,float16,0,0.13806399703025818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,fp8,0,0.13980799913406372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,fp8,0,0.8425760269165039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,float16,0,0.8464213212331136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,64,128,1,fp8,fp8,0,0.1357973317305247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,64,0,1,fp8,fp8,0,0.7819626331329346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,float16,0,0.08710933725039165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,fp8,0,0.8502293427785238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,fp8,0,0.0897706647713979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,64,0,1,fp8,fp8,0,0.7918559710184733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,64,128,1,fp8,fp8,0,0.08996267120043437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,64,0,1,fp8,fp8,0,0.46489067872365314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,float16,0,0.4947093327840169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,float16,0,0.0809333324432373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,fp8,0,0.5024693409601847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,fp8,0,0.08277866741021474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,float16,0,0.4921120007832845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,64,128,1,fp8,fp8,0,0.07905599971612294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,fp8,0,0.08463999629020691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,float16,0,0.08271466692288716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,fp8,0,0.4899040063222249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,64,0,1,fp8,fp8,0,0.4549386501312256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,float16,0,0.4946826696395874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,64,128,1,fp8,fp8,0,0.08074666559696198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,float16,0,0.0633546660343806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,fp8,0,0.4944106737772624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,64,0,1,fp8,fp8,0,0.45953067143758136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,fp8,0,0.3282453417778015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,float16,0,0.32450666030248004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,64,128,1,fp8,fp8,0,0.06030400097370148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,float16,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,64,0,1,fp8,fp8,0,0.3017866611480713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,fp8,0,0.06396799782911937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,float16,0,0.06264000137646993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,float16,0,0.3275573253631592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,64,128,1,fp8,fp8,0,0.06178666651248932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,fp8,0,0.32548266649246216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,64,0,1,fp8,fp8,0,0.30189865827560425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,fp8,0,0.06403733293215434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,float16,0,0.3274986743927002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,float16,0,0.19131199518839517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,64,128,1,fp8,fp8,0,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,fp8,0,0.3249066670735677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,64,0,1,fp8,fp8,0,0.3025226593017578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,fp8,0,0.1937440037727356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,64,128,1,fp8,fp8,0,0.18313600619633993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,float16,0,0.9327946503957113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,float16,0,0.1991200049718221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,fp8,0,0.9381706714630127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,fp8,0,0.20277865727742514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,64,0,1,fp8,fp8,0,0.8719786802927653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,64,128,1,fp8,fp8,0,0.19301333030064902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,float16,0,0.11828800042470296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,float16,0,0.9471093018849691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,fp8,0,0.12146666646003723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,64,128,1,fp8,fp8,0,0.11713600158691406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,fp8,0,0.9454452991485596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,float16,0,0.5324480136235555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,float16,0,0.10634666681289673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,64,0,1,fp8,fp8,0,0.879466692606608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,fp8,0,0.5353279908498129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,64,0,1,fp8,fp8,0,0.4997119903564453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,fp8,0,0.1074133316675822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,float16,0,0.5171626806259155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,64,128,1,fp8,fp8,0,0.10088533163070679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,float16,0,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,fp8,0,0.5219626824061075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,64,0,1,fp8,fp8,0,0.48099732398986816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,fp8,0,0.11184533437093098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,float16,0,0.5216746727625529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,64,0,1,fp8,fp8,0,0.49109868208567303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,64,128,1,fp8,fp8,0,0.10724799831708272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,float16,0,0.06840000053246816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,fp8,0,0.5234026511510214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,float16,0,0.3163733283678691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,fp8,0,0.07052266597747803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,64,128,1,fp8,fp8,0,0.06975999971230824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,fp8,0,0.31817599137624103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,float16,0,0.06527466575304668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,64,0,1,fp8,fp8,0,0.2958453297615051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,fp8,0,0.06423999865849812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,float16,0,0.3139466643333435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,64,128,1,fp8,fp8,0,0.06266133487224579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,fp8,0,0.3128160039583842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,64,0,1,fp8,fp8,0,0.28934399286905926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,float16,0,0.06590400139490764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,float16,0,0.31410666306813556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,64,128,1,fp8,fp8,0,0.06396799782911937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,fp8,0,0.3141813278198242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,float16,0,0.054287999868392944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,64,0,1,fp8,fp8,0,0.2935413320859273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,64,0,1,fp8,fp8,0,0.19874666134516397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,fp8,0,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,float16,0,0.2115573287010193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,64,128,1,fp8,fp8,0,0.05167999863624573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,fp8,0,0.21448000272115073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,float16,0,0.053930665055910744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,float16,0,0.054485330979029335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,float16,0,0.21335999170939127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,64,128,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,fp8,0,0.2154560089111328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,64,0,1,fp8,fp8,0,0.19779733816782633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,float16,0,0.2135039965311686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,float16,0,0.15782399972279867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,fp8,0,0.05374933282534281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,64,128,1,fp8,fp8,0,0.05192000170548757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,fp8,0,0.21303466955820718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,64,0,1,fp8,fp8,0,0.19925866524378458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,fp8,0,0.16062933206558228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,64,128,1,fp8,fp8,0,0.15220800042152405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,float16,0,0.6868639787038168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,float16,0,0.1646880010763804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,64,128,1,fp8,fp8,0,0.16029333074887595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,fp8,0,0.6864266395568848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,64,0,1,fp8,fp8,0,0.639792005221049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,fp8,0,0.16691199938456217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,fp8,0,0.6929492950439453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,float16,0,0.6904799938201904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,float16,0,0.09962667028109233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,fp8,0,0.10259200135866801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,64,0,1,fp8,fp8,0,0.6485226551691691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,float16,0,0.39503999551137287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,64,128,1,fp8,fp8,0,0.10043199857076009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,float16,0,0.08961066603660583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,fp8,0,0.3999093373616536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,64,0,1,fp8,fp8,0,0.37305601437886554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,fp8,0,0.09278399745623271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,float16,0,0.3880853255589803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,64,128,1,fp8,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,float16,0,0.3893386522928874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,fp8,0,0.38811735312143963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,float16,0,0.09286933143933614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,float16,0,0.06232533355553945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,64,0,1,fp8,fp8,0,0.35946134726206463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,fp8,0,0.09473066528638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,64,128,1,fp8,fp8,0,0.06211199859778086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,64,128,1,fp8,fp8,0,0.09134399890899658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,fp8,0,0.3930986722310384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,64,0,1,fp8,fp8,0,0.36425598462422687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,fp8,0,0.06410133341948192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,fp8,0,0.060378665725390114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,float16,0,0.245578666528066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,fp8,0,0.24790932734807333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,64,0,1,fp8,fp8,0,0.2308746576309204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,float16,0,0.06003733476003011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,float16,0,0.24344533681869507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,64,128,1,fp8,fp8,0,0.057775999108950295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,fp8,0,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,fp8,0,0.24448533852895102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,64,0,1,fp8,fp8,0,0.22614399592081705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,fp8,0,0.24545600016911825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,float16,0,0.06132266422112783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,float16,0,0.24521599213282266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,64,128,1,fp8,fp8,0,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,64,0,1,fp8,fp8,0,0.22968000173568726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,float16,0,0.04972266654173533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,fp8,0,0.0498986691236496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,float16,0,0.16034666697184244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,64,128,1,fp8,fp8,0,0.04726399978001913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,fp8,0,0.04923200110594431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,fp8,0,0.16030933459599814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,64,0,1,fp8,fp8,0,0.1479200025399526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,float16,0,0.049466664592425026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,float16,0,0.15933332840601602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,64,128,1,fp8,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,fp8,0,0.15995200475056967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,64,0,1,fp8,fp8,0,0.14825600385665894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,float16,0,0.049685334165891014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,64,0,1,fp8,fp8,0,0.15002133448918661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,fp8,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,float16,0,0.15851199626922607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,64,128,1,fp8,fp8,0,0.047269334395726524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,64,128,1,fp8,fp8,0,0.231989324092865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,fp8,0,0.15966399510701498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,float16,0,0.24279999732971191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,fp8,0,0.24652800957361856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,float16,0,0.8776853084564209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,float16,0,0.25623466571172077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,fp8,0,0.8814773559570312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,64,0,1,fp8,fp8,0,0.8162986437479655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,fp8,0,0.25945599873860675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,float16,0,0.14381866653760275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,64,128,1,fp8,fp8,0,0.24596800406773886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,float16,0,0.8883732954661051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,fp8,0,0.8948480288187662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,fp8,0,0.14708800117174783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,64,0,1,fp8,fp8,0,0.8313759962717692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,float16,0,0.4915733337402344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,64,128,1,fp8,fp8,0,0.14190399646759033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,fp8,0,0.49542399247487384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,float16,0,0.12573333581288657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,64,0,1,fp8,fp8,0,0.4607359965642293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,fp8,0,0.12753066420555115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,float16,0,0.47377065817515057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,64,128,1,fp8,fp8,0,0.12266133228937785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,float16,0,0.47596800327301025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,fp8,0,0.4738239844640096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,float16,0,0.13102933764457703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,64,0,1,fp8,fp8,0,0.44470401604970294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,fp8,0,0.13410666584968567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,64,128,1,fp8,fp8,0,0.1297653317451477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,float16,0,0.08061333497365315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,fp8,0,0.4816746711730957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,64,0,1,fp8,fp8,0,0.44893864790598553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,float16,0,0.28064533074696857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,fp8,0,0.08268799881140391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,64,128,1,fp8,fp8,0,0.08239999910195668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,fp8,0,0.28409600257873535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,64,0,1,fp8,fp8,0,0.26710933446884155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,fp8,0,0.2760533293088277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,float16,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,fp8,0,0.07506133119265239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,float16,0,0.27525333563486737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,64,128,1,fp8,fp8,0,0.07039999961853027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,64,0,1,fp8,fp8,0,0.2564586599667867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,float16,0,0.07625600198904674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,fp8,0,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,float16,0,0.2751200000445048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,64,128,1,fp8,fp8,0,0.0735040009021759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,fp8,0,0.2768373290697734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,64,0,1,fp8,fp8,0,0.25758934020996094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,float16,0,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,float16,0,0.1808799902598063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,fp8,0,0.05159999926884969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,64,128,1,fp8,fp8,0,0.049738665421803795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,64,128,1,fp8,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,fp8,0,0.18142932653427124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,64,0,1,fp8,fp8,0,0.16889599959055582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,float16,0,0.04776533444722494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,float16,0,0.17884800831476846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,fp8,0,0.04886400202910105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,fp8,0,0.17966399590174356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,float16,0,0.048122664292653404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,64,0,1,fp8,fp8,0,0.166810671488444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,float16,0,0.1790133317311605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,fp8,0,0.0499839981396993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,64,128,1,fp8,fp8,0,0.047930667797724404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,fp8,0,0.18036266167958578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,64,0,1,fp8,fp8,0,0.16714666287104288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,float16,0,0.037952000896135964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,float16,0,0.12240533034006755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,fp8,0,0.03815466662247976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,float16,0,0.12337600191434224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,64,128,1,fp8,fp8,0,0.0373279998699824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,fp8,0,0.12344533205032349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,64,128,1,fp8,fp8,0,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,64,0,1,fp8,fp8,0,0.11545067032178243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,float16,0,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,fp8,0,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,fp8,0,0.1222826639811198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,64,128,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,64,0,1,fp8,fp8,0,0.11507733662923177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,float16,0,0.037808001041412354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,float16,0,0.12169599533081055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,float16,0,0.18886399269104004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,fp8,0,0.03808533400297165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,fp8,0,0.12337066729863484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,64,0,1,fp8,fp8,0,0.11456533273061116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,fp8,0,0.1906399925549825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,float16,0,0.5447839895884196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,64,0,1,fp8,fp8,0,0.5089600086212158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,64,128,1,fp8,fp8,0,0.18019733826319376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,float16,0,0.1977120041847229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,fp8,0,0.5484586556752523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,fp8,0,0.19914666811625162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,64,128,1,fp8,fp8,0,0.19125866889953613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,64,0,1,fp8,fp8,0,0.5196693340937296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,float16,0,0.5564479827880859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,float16,0,0.1135040024916331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,fp8,0,0.5589866638183594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,float16,0,0.3134400049845378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,fp8,0,0.11515733599662781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,64,128,1,fp8,fp8,0,0.11310399572054546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,fp8,0,0.31413867076237995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,64,0,1,fp8,fp8,0,0.2953919967015584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,float16,0,0.09903466701507568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,float16,0,0.29975465933481854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,fp8,0,0.10228266318639119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,64,128,1,fp8,fp8,0,0.096261332432429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,fp8,0,0.2998080054918925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,64,0,1,fp8,fp8,0,0.27773332595825195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,float16,0,0.10453866918881734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,fp8,0,0.1069546639919281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,float16,0,0.3015519976615906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,64,128,1,fp8,fp8,0,0.10327466328938802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,64,128,1,fp8,fp8,0,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,fp8,0,0.30542399485905963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,64,0,1,fp8,fp8,0,0.2874240080515544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,64,0,1,fp8,fp8,0,0.1725920041402181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,float16,0,0.06389333307743073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,float16,0,0.18145066499710083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,fp8,0,0.06604266663392384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,fp8,0,0.18589866161346436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,float16,0,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,float16,0,0.17998399337132773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,fp8,0,0.06058133145173391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,64,128,1,fp8,fp8,0,0.05653866628805796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,fp8,0,0.18252267440160116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,64,0,1,fp8,fp8,0,0.16673600673675537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,float16,0,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,float16,0,0.1811093290646871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,fp8,0,0.0622026671965917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,64,128,1,fp8,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,fp8,0,0.18330667416254678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,64,0,1,fp8,fp8,0,0.1688106656074524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,float16,0,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,float16,0,0.12142399946848552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,float16,0,0.1197653313477834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,fp8,0,0.04550399879614512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,64,128,1,fp8,fp8,0,0.04345599810282389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,fp8,0,0.12291733423868816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,64,0,1,fp8,fp8,0,0.11338667074839275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,float16,0,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,fp8,0,0.04250133534272512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,64,128,1,fp8,fp8,0,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,fp8,0,0.12061867117881775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,64,0,1,fp8,fp8,0,0.11107732852300008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,float16,0,0.042725334564844765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,float16,0,0.12141333023707072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,fp8,0,0.0441599984963735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,64,128,1,fp8,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,fp8,0,0.12026133139928182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,64,0,1,fp8,fp8,0,0.11204800009727478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,float16,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,float16,0,0.09479999542236328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,64,128,1,fp8,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,fp8,0,0.09691199660301208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,64,0,1,fp8,fp8,0,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,float16,0,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,float16,0,0.09512533744176228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,64,128,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,fp8,0,0.09676266709963481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,64,0,1,fp8,fp8,0,0.08984532952308655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,float16,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,float16,0,0.09500799576441447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,64,128,1,fp8,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,fp8,0,0.24887466430664062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,float16,0,0.5446346600850424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,64,128,1,fp8,fp8,0,0.23539199431737265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,fp8,0,0.09506133198738098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,64,0,1,fp8,fp8,0,0.08922132849693298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,float16,0,0.247381329536438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,fp8,0,0.5485333204269409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,float16,0,0.2616426746050517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,64,0,1,fp8,fp8,0,0.5079466501871744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,fp8,0,0.2627360026041667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,float16,0,0.5594880183537801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,64,128,1,fp8,fp8,0,0.24914133548736572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,64,0,1,fp8,fp8,0,0.5215466817220052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,float16,0,0.14406399925549826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,fp8,0,0.5591093301773071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,float16,0,0.3083359996477763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,fp8,0,0.14669866363207498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,64,128,1,fp8,fp8,0,0.1418293317159017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,fp8,0,0.31069332361221313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,64,0,1,fp8,fp8,0,0.29233600695927936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,float16,0,0.1237386663754781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,float16,0,0.28779200712839764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,fp8,0,0.1256053348382314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,64,128,1,fp8,fp8,0,0.12357333302497864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,fp8,0,0.2903199990590413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,64,0,1,fp8,fp8,0,0.2712799906730652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,float16,0,0.13148799538612366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,float16,0,0.29365332921346027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,fp8,0,0.1325386663277944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,64,128,1,fp8,fp8,0,0.12982933719952902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,fp8,0,0.29818665981292725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,64,0,1,fp8,fp8,0,0.27904532353083294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,float16,0,0.07666133344173431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,float16,0,0.17064533631006876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,fp8,0,0.08031466603279114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,64,128,1,fp8,fp8,0,0.07881066699822743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,fp8,0,0.17490132649739584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,64,0,1,fp8,fp8,0,0.16647467017173767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,float16,0,0.07077333331108093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,float16,0,0.07284800211588542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,float16,0,0.16477866967519125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,64,128,1,fp8,fp8,0,0.06808533271153767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,fp8,0,0.07229866584142049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,fp8,0,0.16683733463287354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,fp8,0,0.16871466239293417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,64,0,1,fp8,fp8,0,0.1546933352947235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,float16,0,0.1675893266995748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,fp8,0,0.0745119998852412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,64,128,1,fp8,fp8,0,0.07039466500282288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,64,0,1,fp8,fp8,0,0.1579253375530243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,float16,0,0.047594666481018066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,float16,0,0.10932266712188721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,fp8,0,0.04806933303674062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,64,128,1,fp8,fp8,0,0.04690133531888326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,fp8,0,0.11133333047231038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,64,0,1,fp8,fp8,0,0.10322667161623637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,fp8,0,0.10928533474604289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,float16,0,0.04459733267625173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,float16,0,0.10730666915575664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,64,128,1,fp8,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,64,0,1,fp8,fp8,0,0.10060800115267436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,float16,0,0.04532266656557719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,float16,0,0.10730666915575664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,fp8,0,0.04555733501911163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,64,128,1,fp8,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,fp8,0,0.10929066936175029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,fp8,0,0.07246933380762736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,64,0,1,fp8,fp8,0,0.10109866658846538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,64,0,1,fp8,fp8,0,0.06807466844717662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,float16,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,float16,0,0.07257066667079926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,fp8,0,0.0703413337469101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,64,128,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,float16,0,0.02940266579389572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,float16,0,0.07049066821734111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,float16,0,0.07119999825954437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,64,128,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,64,0,1,fp8,fp8,0,0.06623466809590657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,float16,0,0.06842133402824402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,64,128,1,fp8,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,fp8,0,0.07237866520881653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,64,0,1,fp8,fp8,0,0.06779199838638306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,float16,0,0.06829866766929626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,64,128,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,fp8,0,0.06837333242098491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,64,0,1,fp8,fp8,0,0.0639626681804657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,64,0,1,fp8,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,float16,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,64,128,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,fp8,0,0.06841066479682922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,float16,0,0.06851199766000111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,64,128,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,fp8,0,0.06830933193365733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,64,0,1,fp8,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,float16,0,0.19034665822982788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,float16,0,0.34984532992045086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,fp8,0,0.19271467129389444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,64,0,1,fp8,fp8,0,0.33140265941619873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,64,128,1,fp8,fp8,0,0.18200532595316568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,fp8,0,0.35337066650390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,float16,0,0.19921600818634033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,float16,0,0.35884801546732586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,fp8,0,0.19944000244140625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,64,128,1,fp8,fp8,0,0.1914773384730021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,fp8,0,0.36108799775441486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,float16,0,0.11310399572054546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,64,128,1,fp8,fp8,0,0.11269332965215047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,64,0,1,fp8,fp8,0,0.34059735139211017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,float16,0,0.20244266589482626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,fp8,0,0.11619200309117635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,fp8,0,0.20601065953572592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,64,0,1,fp8,fp8,0,0.19522666931152344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,float16,0,0.09989866614341736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,float16,0,0.1881706714630127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,fp8,0,0.10146133104960124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,64,128,1,fp8,fp8,0,0.09476266304651897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,fp8,0,0.189082662264506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,64,0,1,fp8,fp8,0,0.1774453322092692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,float16,0,0.1032373309135437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,fp8,0,0.1937546730041504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,64,0,1,fp8,fp8,0,0.18454933166503906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,float16,0,0.19127466281255087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,fp8,0,0.10629866520563762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,64,128,1,fp8,fp8,0,0.10314133763313293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,float16,0,0.06009600063165029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,float16,0,0.11744532982508342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,fp8,0,0.06385600070158641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,64,128,1,fp8,fp8,0,0.06121066709359487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,fp8,0,0.058464000622431435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,fp8,0,0.11902399857838948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,64,0,1,fp8,fp8,0,0.11107732852300008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,float16,0,0.05779733260472616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,float16,0,0.11245866616566975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,64,128,1,fp8,fp8,0,0.05585599939028422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,fp8,0,0.11316800117492676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,64,0,1,fp8,fp8,0,0.105295995871226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,float16,0,0.058149332801500954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,fp8,0,0.060090666015942894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,float16,0,0.11402666568756104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,64,128,1,fp8,fp8,0,0.05632533133029938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,fp8,0,0.11596799890200298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,fp8,0,0.07858133316040039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,64,0,1,fp8,fp8,0,0.10763733585675557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,float16,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,float16,0,0.07643733421961467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,fp8,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,64,128,1,fp8,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,64,0,1,fp8,fp8,0,0.07025599976380666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,64,0,1,fp8,fp8,0,0.07322666545708974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,float16,0,0.04083200047413508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,float16,0,0.07518399755160014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,64,128,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,fp8,0,0.07662400106589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,64,128,1,fp8,fp8,0,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,fp8,0,0.07595733304818471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,float16,0,0.04033066580692927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,float16,0,0.07653866708278656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,fp8,0,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,64,0,1,fp8,fp8,0,0.0705973356962204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,float16,0,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,float16,0,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,fp8,0,0.028410665690898895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,fp8,0,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,64,0,1,fp8,fp8,0,0.052426666021347046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,64,0,1,fp8,fp8,0,0.05539200206597646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,float16,0,0.02779199928045273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,64,128,1,fp8,fp8,0,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,fp8,0,0.05808533231417338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,float16,0,0.05786666770776113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,64,128,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,fp8,0,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,64,0,1,fp8,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,float16,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,float16,0,0.054570664962132774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,64,128,1,fp8,fp8,0,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,fp8,0,0.05593599875768026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,64,0,1,fp8,fp8,0,0.05192000170548757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,float16,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,64,128,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,fp8,0,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,64,0,1,fp8,fp8,0,0.05170666674772898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,float16,0,0.05574933191140493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,64,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,fp8,0,0.055770665407180786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,float16,0,0.3741813500722249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,64,0,1,fp8,fp8,0,0.051781331499417625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,float16,0,0.2500693400700887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,fp8,0,0.2507306734720866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,64,128,1,fp8,fp8,0,0.23499733209609985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,fp8,0,0.3744800090789795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,64,0,1,fp8,fp8,0,0.3518986701965332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,float16,0,0.2653653422991435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,float16,0,0.386186679204305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,fp8,0,0.26466667652130127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,64,128,1,fp8,fp8,0,0.24833067258199057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,fp8,0,0.38679468631744385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,float16,0,0.14406933387120566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,64,0,1,fp8,fp8,0,0.36572265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,float16,0,0.21535466114679971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,64,0,1,fp8,fp8,0,0.20768000682195029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,fp8,0,0.14667733510335287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,64,128,1,fp8,fp8,0,0.14385599891344705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,fp8,0,0.21621867020924887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,float16,0,0.12567999958992004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,float16,0,0.19502933820088705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,fp8,0,0.1281599998474121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,64,128,1,fp8,fp8,0,0.12474133570988973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,fp8,0,0.1973386605580648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,64,0,1,fp8,fp8,0,0.18750399351119995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,float16,0,0.13119999567667642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,fp8,0,0.20438933372497559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,64,0,1,fp8,fp8,0,0.19513599077860513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,float16,0,0.20329066117604574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,fp8,0,0.13591999808947244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,64,128,1,fp8,fp8,0,0.13330666224161783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,float16,0,0.07720533510049184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,float16,0,0.11661332845687866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,fp8,0,0.08039466540018718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,64,128,1,fp8,fp8,0,0.07839466631412506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,fp8,0,0.12065066893895467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,64,0,1,fp8,fp8,0,0.11527466773986816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,float16,0,0.0715946654478709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,float16,0,0.11241066455841064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,fp8,0,0.07420800129572551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,64,128,1,fp8,fp8,0,0.06823466718196869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,fp8,0,0.11211733023325603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,64,0,1,fp8,fp8,0,0.10424000024795532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,float16,0,0.07283733288447063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,float16,0,0.114138662815094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,fp8,0,0.07484266658624013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,64,128,1,fp8,fp8,0,0.07245866457621257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,64,128,1,fp8,fp8,0,0.04558399816354116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,fp8,0,0.11539733409881592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,64,0,1,fp8,fp8,0,0.1074186662832896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,float16,0,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,float16,0,0.07266666491826375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,fp8,0,0.046538665890693665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,fp8,0,0.07457066575686137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,fp8,0,0.07217066486676534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,64,0,1,fp8,fp8,0,0.07029333213965099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,float16,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,float16,0,0.07037333150704701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,fp8,0,0.04457066456476847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,float16,0,0.0722453345855077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,fp8,0,0.04446400205294291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,64,128,1,fp8,fp8,0,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,64,0,1,fp8,fp8,0,0.06665599842866261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,float16,0,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,float16,0,0.04562666515509287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,64,128,1,fp8,fp8,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,fp8,0,0.07230933507283528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,64,0,1,fp8,fp8,0,0.06768533090750377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,float16,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,float16,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,64,128,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,64,128,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,fp8,0,0.04762666424115499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,64,0,1,fp8,fp8,0,0.045978665351867676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,fp8,0,0.02995733420054118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,fp8,0,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,64,0,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,float16,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,float16,0,0.04775999983151754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,64,128,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,64,0,1,fp8,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,64,0,1,fp8,fp8,0,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,float16,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,float16,0,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,64,0,1,fp8,fp8,0,0.04081066697835922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,float16,0,0.04242666562398275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,fp8,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,float16,0,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,float16,0,0.041264000038305916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,64,128,1,fp8,fp8,0,0.024346667031447094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,fp8,0,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,64,0,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,64,0,1,fp8,fp8,0,0.03841600070397059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,64,128,1,fp8,fp8,0,0.02236266682545344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,float16,0,0.041082667807737984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,64,128,1,fp8,fp8,0,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,64,0,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,float16,0,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,fp8,0,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,64,128,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,64,0,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,float16,0,0.20860799153645834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,float16,0,0.27006399631500244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,fp8,0,0.21089067061742148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,64,128,1,fp8,fp8,0,0.20034666856129965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,fp8,0,0.2717866698900859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,64,0,1,fp8,fp8,0,0.25858134031295776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,float16,0,0.2193066676457723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,float16,0,0.28108266989390057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,fp8,0,0.21823465824127197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,64,128,1,fp8,fp8,0,0.2101973295211792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,fp8,0,0.2789493401845296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,float16,0,0.1220746636390686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,64,0,1,fp8,fp8,0,0.26759999990463257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,float16,0,0.15755200386047363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,fp8,0,0.12366400162378947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,64,128,1,fp8,fp8,0,0.12229866782824199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,fp8,0,0.15825066963831583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,64,0,1,fp8,fp8,0,0.15505066514015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,float16,0,0.10363733768463135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,float16,0,0.1379680037498474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,fp8,0,0.10659733414649963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,64,128,1,fp8,fp8,0,0.09982400139172871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,fp8,0,0.14131200313568115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,64,0,1,fp8,fp8,0,0.13310933113098145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,float16,0,0.10744000474611919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,float16,0,0.14358400305112204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,fp8,0,0.11089600125948589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,64,128,1,fp8,fp8,0,0.10725866754849751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,64,128,1,fp8,fp8,0,0.062047998110453285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,fp8,0,0.14697066942850748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,64,0,1,fp8,fp8,0,0.08227199812730153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,64,0,1,fp8,fp8,0,0.1400266687075297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,float16,0,0.06459199885527293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,float16,0,0.08679466446240743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,fp8,0,0.06597333153088887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,fp8,0,0.08683733145395915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,float16,0,0.060047999024391174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,float16,0,0.0823466678460439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,fp8,0,0.061333333452542625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,64,128,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,fp8,0,0.08273600041866302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,64,0,1,fp8,fp8,0,0.07658133407433827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,64,0,1,fp8,fp8,0,0.08006399869918823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,float16,0,0.06027733286221822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,float16,0,0.08345599969228108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,fp8,0,0.06300800045331319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,64,128,1,fp8,fp8,0,0.05909866591294607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,fp8,0,0.057434668143590294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,64,0,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,float16,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,fp8,0,0.08478933572769165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,float16,0,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,float16,0,0.055045331517855324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,fp8,0,0.05374933282534281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,fp8,0,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,64,128,1,fp8,fp8,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,float16,0,0.05304533243179321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,64,128,1,fp8,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,64,0,1,fp8,fp8,0,0.05007466673851013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,float16,0,0.05398933092753092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,float16,0,0.0397119993964831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,64,128,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,fp8,0,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,64,0,1,fp8,fp8,0,0.03984533250331879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,64,0,1,fp8,fp8,0,0.05020800232887268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,float16,0,0.03860266755024592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,float16,0,0.029098667204380035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,64,128,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,float16,0,0.027072000006834667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,64,128,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,64,0,1,fp8,fp8,0,0.03676266719897588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,float16,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,float16,0,0.0245919997493426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,64,128,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,64,0,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,float16,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,float16,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,64,0,1,fp8,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,float16,0,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,64,128,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,fp8,0,0.035743998984495796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,64,128,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,64,0,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,64,0,1,fp8,fp8,0,0.034602666894594826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,float16,0,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,fp8,0,0.022410665949185688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,float16,0,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,fp8,0,0.033904001116752625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,float16,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,64,128,1,fp8,fp8,0,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,64,128,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,64,0,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,float16,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,float16,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,fp8,0,0.02197333425283432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,64,0,1,fp8,fp8,0,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,float16,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,64,0,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,float16,0,0.22165866692860922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,float16,0,0.2616479992866516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,fp8,0,0.22160534063975015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,64,128,1,fp8,fp8,0,0.21043733755747476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,fp8,0,0.22341332832972208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,fp8,0,0.26050132513046265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,64,0,1,fp8,fp8,0,0.24674133459726968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,float16,0,0.22672533988952637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,float16,0,0.2640373309453328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,64,128,1,fp8,fp8,0,0.21970667441685995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,fp8,0,0.2632960081100464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,float16,0,0.12367467085520427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,fp8,0,0.14748799800872803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,64,0,1,fp8,fp8,0,0.2585973342259725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,float16,0,0.14620799819628397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,fp8,0,0.12369599938392639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,64,128,1,fp8,fp8,0,0.12402133146921794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,64,0,1,fp8,fp8,0,0.14404267072677612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,float16,0,0.11662399768829346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,float16,0,0.1381280024846395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,fp8,0,0.11553600430488586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,64,128,1,fp8,fp8,0,0.11122133334477742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,fp8,0,0.13642133275667825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,64,0,1,fp8,fp8,0,0.1322773297627767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,float16,0,0.11956800023714702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,float16,0,0.1404853363831838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,fp8,0,0.11775466799736023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,64,128,1,fp8,fp8,0,0.11761066317558289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,64,128,1,fp8,fp8,0,0.067930668592453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,fp8,0,0.1409386694431305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,64,0,1,fp8,fp8,0,0.13818666338920593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,float16,0,0.06422933439413707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,float16,0,0.06759466727574666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,float16,0,0.07832533121109009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,fp8,0,0.06414400041103363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,64,128,1,fp8,fp8,0,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,float16,0,0.08054933448632558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,fp8,0,0.06821333368619283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,64,0,1,fp8,fp8,0,0.08054399987061818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,fp8,0,0.07804266611735027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,64,0,1,fp8,fp8,0,0.07415999968846639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,float16,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,float16,0,0.06613866488138835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,float16,0,0.07860800127188365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,64,128,1,fp8,fp8,0,0.06325866778691609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,64,0,1,fp8,fp8,0,0.05073066552480062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,64,0,1,fp8,fp8,0,0.07426133255163829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,float16,0,0.05215999980767568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,64,128,1,fp8,fp8,0,0.04109866668780645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,fp8,0,0.05183466772238413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,float16,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,float16,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,fp8,0,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,float16,0,0.051327998439470925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,64,128,1,fp8,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,fp8,0,0.05195199946562449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,64,0,1,fp8,fp8,0,0.047685335079828896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,float16,0,0.04119999955097834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,float16,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,64,128,1,fp8,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,fp8,0,0.05138133466243744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,64,0,1,fp8,fp8,0,0.04961066444714864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,float16,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,64,128,1,fp8,fp8,0,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,64,0,1,fp8,fp8,0,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,float16,0,0.02553066611289978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,float16,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,float16,0,0.03320533285538355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,64,128,1,fp8,fp8,0,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,fp8,0,0.032586666444937386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,64,0,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,float16,0,0.026373334228992462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,64,128,1,fp8,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,64,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,float16,0,0.022143999735514324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,float16,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,64,128,1,fp8,fp8,0,0.022890667120615642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,64,0,1,fp8,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,float16,0,0.028064000109831493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,fp8,0,0.028192001084486645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,64,0,1,fp8,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,64,0,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,fp8,0,0.022240000466505688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,float16,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,float16,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,64,128,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,64,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,64,0,1,fp8,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,64,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,64,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,64,0,1,fp8,fp8,0,0.02604266752799352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,64,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,fp8,0,0.01989866668979327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,float16,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,64,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,fp8,0,0.020634666085243225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,64,128,1,fp8,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,64,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,float16,0,0.2172586719195048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,fp8,0,0.21858133872350058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,float16,0,0.21993066867192587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,fp8,0,0.21453332901000977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,float16,0,0.22056533892949423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,64,128,1,fp8,fp8,0,0.20332266887029013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,64,0,1,fp8,fp8,0,0.209007998307546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,float16,0,0.2244053284327189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,fp8,0,0.21896000703175864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,64,128,1,fp8,fp8,0,0.21524266401926676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,fp8,0,0.22181334098180136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,float16,0,0.12195733189582825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,fp8,0,0.12296000123023987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,64,0,1,fp8,fp8,0,0.2178879976272583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,float16,0,0.12474667032559712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,fp8,0,0.1197119951248169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,fp8,0,0.11310933033625285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,64,128,1,fp8,fp8,0,0.12140799562136333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,64,0,1,fp8,fp8,0,0.12199466427167256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,float16,0,0.11245333154996236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,float16,0,0.11661332845687866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,64,128,1,fp8,fp8,0,0.10941333572069804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,fp8,0,0.11556800206502278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,64,0,1,fp8,fp8,0,0.11014399925867717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,float16,0,0.11762133240699768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,float16,0,0.116565336783727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,fp8,0,0.11412800351778667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,64,128,1,fp8,fp8,0,0.11493333180745442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,fp8,0,0.11601066589355469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,64,0,1,fp8,fp8,0,0.11731200416882832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,float16,0,0.06658666829268138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,float16,0,0.06671999891599019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,fp8,0,0.06605333089828491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,64,128,1,fp8,fp8,0,0.06640000144640605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,fp8,0,0.06611200173695882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,64,128,1,fp8,fp8,0,0.06052266558011373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,64,0,1,fp8,fp8,0,0.06633600095907848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,float16,0,0.06374399860699971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,float16,0,0.06449066599210103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,fp8,0,0.06407999992370605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,64,0,1,fp8,fp8,0,0.061018665631612144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,float16,0,0.06420266628265381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,float16,0,0.06435200075308482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,fp8,0,0.0647680014371872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,64,128,1,fp8,fp8,0,0.06066133578618368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,fp8,0,0.06563200056552887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,64,0,1,fp8,fp8,0,0.06192533175150553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,float16,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,float16,0,0.04353066782156626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,fp8,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,64,128,1,fp8,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,64,128,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,fp8,0,0.04353066782156626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,64,0,1,fp8,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,float16,0,0.03977066775163015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,float16,0,0.045312002301216125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,float16,0,0.041696002086003624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,fp8,0,0.04240000247955322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,64,0,1,fp8,fp8,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,float16,0,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,fp8,0,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,64,128,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,64,0,1,fp8,fp8,0,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,64,128,1,fp8,fp8,0,0.02682666728893916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,fp8,0,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,64,0,1,fp8,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,64,128,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,64,128,1,fp8,fp8,0,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,64,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,64,0,1,fp8,fp8,0,0.025701334079106648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,64,0,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,float16,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,64,0,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,fp8,0,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,64,128,1,fp8,fp8,0,0.02205866575241089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,64,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,float16,0,0.02162133405605952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,float16,0,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,64,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,fp8,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,64,128,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,64,0,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,64,128,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,64,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,float16,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,fp8,0,0.020831999679406483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,64,128,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,64,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,float16,0,0.020341333001852036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,float16,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,64,128,1,fp8,fp8,0,0.09880000352859497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,64,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,float16,0,0.10227200388908386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,64,0,1,fp8,fp8,0,0.09537600477536519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,float16,0,0.10082133611043294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,fp8,0,0.1011253297328949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,64,128,1,fp8,fp8,0,0.10332799951235454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,fp8,0,0.09964266419410706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,float16,0,0.10453333457310994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,float16,0,0.10314133763313293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,fp8,0,0.10316800077756245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,fp8,0,0.10114666819572449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,64,0,1,fp8,fp8,0,0.10318400462468465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,float16,0,0.06205333272616068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,float16,0,0.059818665186564125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,fp8,0,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,64,128,1,fp8,fp8,0,0.06188266475995382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,fp8,0,0.059792002042134605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,64,0,1,fp8,fp8,0,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,float16,0,0.059434667229652405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,float16,0,0.05619733532269796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,fp8,0,0.05890133480230967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,64,128,1,fp8,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,fp8,0,0.057162667314211525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,64,0,1,fp8,fp8,0,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,float16,0,0.0584799995024999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,float16,0,0.05818133552869161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,fp8,0,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,64,128,1,fp8,fp8,0,0.05774400134881338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,64,128,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,fp8,0,0.05818133552869161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,64,0,1,fp8,fp8,0,0.05644799768924713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,float16,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,float16,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,64,0,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,float16,0,0.03754133234421412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,float16,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,64,128,1,fp8,fp8,0,0.036992001036802925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,64,0,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,float16,0,0.03602133442958196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,64,128,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,fp8,0,0.03638399889071783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,64,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,float16,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,float16,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,fp8,0,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,64,0,1,fp8,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,float16,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,64,128,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,64,128,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,64,0,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,64,0,1,fp8,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,64,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,64,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,64,0,1,fp8,fp8,0,0.020810666183630627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,64,128,1,fp8,fp8,0,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,64,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,64,128,1,fp8,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,64,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,64,128,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,fp8,0,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,64,128,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,64,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,64,128,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,fp8,0,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,fp8,0,0.059674665331840515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,float16,0,0.059215997656186424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,float16,0,0.06020799775918325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,float16,0,0.0599839985370636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,fp8,0,0.05959466596444448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,64,128,1,fp8,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,64,0,1,fp8,fp8,0,0.05793066819508871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,float16,0,0.06006399790445963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,fp8,0,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,64,128,1,fp8,fp8,0,0.06001066664854685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,fp8,0,0.05937066674232483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,64,0,1,fp8,fp8,0,0.05898133416970571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,float16,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,float16,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,64,128,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,64,0,1,fp8,fp8,0,0.0386559988061587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,float16,0,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,float16,0,0.03805333375930786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,64,128,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,64,0,1,fp8,fp8,0,0.036490666369597115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,float16,0,0.03934400031963984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,float16,0,0.03857066730658213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,fp8,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,float16,0,0.025946666797002155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,fp8,0,0.02626666675011317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,64,128,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,fp8,0,0.026789332429567974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,64,0,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,fp8,0,0.039306665460268654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,64,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,64,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,64,128,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,float16,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,float16,0,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,64,128,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,64,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,64,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,fp8,0,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,64,128,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,fp8,0,0.018197332819302876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,fp8,0,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,64,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,64,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,64,128,1,fp8,fp8,0,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,64,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,float16,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,float16,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,fp8,0,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,64,0,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,float16,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,64,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,64,128,1,fp8,fp8,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,64,128,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,64,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,float16,0,0.04783466458320618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,float16,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,fp8,0,0.04749333361784617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,fp8,0,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,fp8,0,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,64,0,1,fp8,fp8,0,0.04569066564242045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,float16,0,0.04811733464399973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,float16,0,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,64,128,1,fp8,fp8,0,0.045552000403404236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,64,0,1,fp8,fp8,0,0.04533333579699198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,float16,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,float16,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,64,128,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,64,0,1,fp8,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,float16,0,0.031194667021433514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,float16,0,0.030165334542592365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,64,128,1,fp8,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,fp8,0,0.030666666726271313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,64,0,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,float16,0,0.031301334500312805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,float16,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,64,128,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,fp8,0,0.030218665798505146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,64,0,1,fp8,fp8,0,0.030282666285832722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,float16,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,float16,0,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,64,128,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,64,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,float16,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,64,128,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,64,0,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,float16,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,float16,0,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,64,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,float16,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,64,0,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,64,0,1,fp8,fp8,0,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,fp8,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,64,128,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,fp8,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,64,0,1,fp8,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,64,0,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,64,128,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,float16,0,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,64,128,1,fp8,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,64,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,fp8,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,64,0,1,fp8,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,float16,0,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,float16,0,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,64,128,1,fp8,fp8,0,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,float16,0,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,64,128,1,fp8,fp8,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,float16,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,float16,0,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,fp8,0,0.04036800066630045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,64,128,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,64,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,float16,0,0.0406986673672994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,float16,0,0.04011733333269755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,fp8,0,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,64,128,1,fp8,fp8,0,0.03851733356714249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,64,0,1,fp8,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,float16,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,64,128,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,64,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,float16,0,0.026746665438016255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,fp8,0,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,64,128,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,64,128,1,fp8,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,fp8,0,0.026816000541051228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,64,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,fp8,0,0.020261333634455998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,64,128,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,64,128,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,float16,0,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,64,128,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,64,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,float16,0,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,64,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,fp8,0,0.01617066686352094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,64,0,1,fp8,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,64,128,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,64,128,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,64,0,1,fp8,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,64,0,1,fp8,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,64,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,float16,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,fp8,0,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,64,128,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,float16,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,float16,0,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,float16,0,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,float16,0,0.03594133257865906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,float16,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,fp8,0,0.03730133424202601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,64,128,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,fp8,0,0.03674133370320002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,64,0,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,float16,0,0.03589333345492681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,float16,0,0.03565333286921183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,fp8,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,64,128,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,fp8,0,0.03721066564321518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,64,0,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,64,128,1,fp8,fp8,0,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,fp8,0,0.02589333305756251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,64,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,64,128,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,64,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,float16,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,64,128,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,64,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,64,128,1,fp8,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,64,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,64,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,64,0,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,float16,0,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,64,128,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,64,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,64,0,1,fp8,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,64,128,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,64,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,64,128,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,64,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,64,128,1,fp8,fp8,0,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,64,128,1,fp8,fp8,0,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,64,128,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,64,128,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,64,0,1,fp8,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,float16,0,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,64,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,float16,0,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,64,128,1,fp8,fp8,0,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,64,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,float16,0,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,64,128,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,64,0,1,fp8,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,float16,0,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,64,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,float16,0,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,float16,0,0.13395733634630838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,fp8,0,0.13586666186650595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,64,128,1,fp8,fp8,0,0.13153066237767538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,float16,0,0.8398613135019938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,float16,0,0.08241599798202515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,fp8,0,0.08433066805203755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,fp8,0,0.8460319836934408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,64,0,1,fp8,fp8,0,0.7862933476765951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,64,128,1,fp8,fp8,0,0.0835040012995402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,float16,0,0.49029866854349774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,float16,0,0.07732800145943959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,fp8,0,0.49265599250793457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,64,0,1,fp8,fp8,0,0.4620159864425659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,fp8,0,0.07974400122960408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,64,128,1,fp8,fp8,0,0.07553599774837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,float16,0,0.48955734570821124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,float16,0,0.050010666251182556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,fp8,0,0.488042672475179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,64,0,1,fp8,fp8,0,0.450981338818868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,fp8,0,0.3158773382504781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,float16,0,0.3153226574261983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,64,128,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,float16,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,64,0,1,fp8,fp8,0,0.2934559981028239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,fp8,0,0.05162133276462555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,float16,0,0.31174933910369873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,64,128,1,fp8,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,fp8,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,fp8,0,0.3141973416010539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,float16,0,0.03843733419974645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,64,0,1,fp8,fp8,0,0.29045865933100384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,float16,0,0.2117919921875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,64,128,1,fp8,fp8,0,0.037791999677817024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,fp8,0,0.21242666244506836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,64,0,1,fp8,fp8,0,0.19643733898798624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,float16,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,float16,0,0.21170665820439658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,64,128,1,fp8,fp8,0,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,fp8,0,0.21240532398223877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,64,0,1,fp8,fp8,0,0.19755200544993082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,float16,0,0.10726400216420491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,fp8,0,0.1090186635653178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,64,128,1,fp8,fp8,0,0.10486933588981628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,float16,0,0.5189013481140137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,float16,0,0.06397333244482677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,fp8,0,0.5209066470464071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,64,0,1,fp8,fp8,0,0.4854666789372762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,fp8,0,0.06598933537801106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,float16,0,0.3111413319905599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,64,128,1,fp8,fp8,0,0.06403199831644694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,fp8,0,0.31491732597351074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,float16,0,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,64,0,1,fp8,fp8,0,0.29259200890858966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,fp8,0,0.3097013235092163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,64,0,1,fp8,fp8,0,0.2869066596031189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,fp8,0,0.06224533418814341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,float16,0,0.3096746603647868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,64,128,1,fp8,fp8,0,0.059802666306495667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,float16,0,0.045781334241231285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,64,0,1,fp8,fp8,0,0.1895093321800232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,float16,0,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,float16,0,0.20388799905776978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,float16,0,0.20451732476552328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,64,128,1,fp8,fp8,0,0.04377066592375437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,fp8,0,0.20333333810170492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,64,128,1,fp8,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,fp8,0,0.2032960057258606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,64,0,1,fp8,fp8,0,0.18902933597564697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,fp8,0,0.16185067097345987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,float16,0,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,float16,0,0.16217600305875143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,64,128,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,64,0,1,fp8,fp8,0,0.1524853308995565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,float16,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,float16,0,0.16269866625467935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,64,128,1,fp8,fp8,0,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,fp8,0,0.09136000275611877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,fp8,0,0.1623093287150065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,64,0,1,fp8,fp8,0,0.1509173313776652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,float16,0,0.09014933307965596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,float16,0,0.38685333728790283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,64,128,1,fp8,fp8,0,0.08822400371233623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,fp8,0,0.3901280164718628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,float16,0,0.05818133552869161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,64,0,1,fp8,fp8,0,0.3616960048675537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,float16,0,0.2433919906616211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,fp8,0,0.0613919993241628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,64,128,1,fp8,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,float16,0,0.2432266672452291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,fp8,0,0.24450665712356567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,64,0,1,fp8,fp8,0,0.22632000843683878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,float16,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,fp8,0,0.05824000140031179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,64,128,1,fp8,fp8,0,0.05538133283456167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,fp8,0,0.04353600243727366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,fp8,0,0.24138667186101279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,float16,0,0.04211199780305227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,64,0,1,fp8,fp8,0,0.22421866655349731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,float16,0,0.04043733328580856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,float16,0,0.15230400363604227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,64,128,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,fp8,0,0.1536799967288971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,64,0,1,fp8,fp8,0,0.14219199617703757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,float16,0,0.15172800421714783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,fp8,0,0.04172799984614054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,64,128,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,fp8,0,0.1513866682847341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,64,0,1,fp8,fp8,0,0.14180800318717957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,float16,0,0.13782933354377747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,fp8,0,0.031178665657838184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,64,128,1,fp8,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,fp8,0,0.13777599732081094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,64,0,1,fp8,fp8,0,0.1275200049082438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,64,0,1,fp8,fp8,0,0.12796266873677573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,float16,0,0.13767466942469278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,64,128,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,float16,0,0.48135467370351154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,fp8,0,0.13796266913414001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,float16,0,0.13268267114957175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,fp8,0,0.13368533054987589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,64,128,1,fp8,fp8,0,0.12946666280428568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,float16,0,0.07839466631412506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,fp8,0,0.48341866334279376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,64,0,1,fp8,fp8,0,0.4484959840774536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,float16,0,0.27957334121068317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,float16,0,0.07307733098665874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,fp8,0,0.08062933385372162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,64,128,1,fp8,fp8,0,0.08038400113582611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,fp8,0,0.282149334748586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,64,0,1,fp8,fp8,0,0.2635733286539714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,64,0,1,fp8,fp8,0,0.2569173375765483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,float16,0,0.2757440010706584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,fp8,0,0.07654933134714763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,64,128,1,fp8,fp8,0,0.07223466535409291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,fp8,0,0.27512532472610474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,float16,0,0.046682665745417275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,float16,0,0.17805333932240805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,64,128,1,fp8,fp8,0,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,fp8,0,0.18076266845067343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,64,0,1,fp8,fp8,0,0.1667840083440145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,float16,0,0.04549333453178406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,float16,0,0.17670400937398276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,64,128,1,fp8,fp8,0,0.04560000201066335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,fp8,0,0.17801066239674887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,64,0,1,fp8,fp8,0,0.16268799702326456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,float16,0,0.11547733346621196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,fp8,0,0.03177600105603536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,64,128,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,fp8,0,0.11687999963760376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,64,0,1,fp8,fp8,0,0.11084266503651936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,float16,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,float16,0,0.11553600430488586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,64,128,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,fp8,0,0.11734933654467265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,64,0,1,fp8,fp8,0,0.1090880036354065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,float16,0,0.028160000840822857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,float16,0,0.11327466368675232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,fp8,0,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,64,128,1,fp8,fp8,0,0.028549333413441975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,fp8,0,0.11343466242154439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,64,0,1,fp8,fp8,0,0.10526933272679646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,64,0,1,fp8,fp8,0,0.10594666997591655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,float16,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,float16,0,0.10317867000897725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,float16,0,0.11308266719182332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,fp8,0,0.10706667105356853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,64,128,1,fp8,fp8,0,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,fp8,0,0.30718932549158734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,fp8,0,0.11312533418337505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,float16,0,0.3023200035095215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,64,128,1,fp8,fp8,0,0.10217600067456563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,float16,0,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,64,0,1,fp8,fp8,0,0.28543466329574585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,float16,0,0.18203200896581015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,64,0,1,fp8,fp8,0,0.170522669951121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,64,128,1,fp8,fp8,0,0.06162666777769724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,fp8,0,0.18412800629933676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,float16,0,0.0583840012550354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,fp8,0,0.18267732858657837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,float16,0,0.17869865894317627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,fp8,0,0.05996266504128774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,float16,0,0.1178986628850301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,64,128,1,fp8,fp8,0,0.05620799958705902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,64,0,1,fp8,fp8,0,0.16661866505940756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,float16,0,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,fp8,0,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,64,128,1,fp8,fp8,0,0.04152533411979675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,fp8,0,0.12110933661460876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,64,0,1,fp8,fp8,0,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,float16,0,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,float16,0,0.1185706655184428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,fp8,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,64,128,1,fp8,fp8,0,0.039349332451820374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,fp8,0,0.11936533451080322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,64,128,1,fp8,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,64,0,1,fp8,fp8,0,0.11121066411336263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,64,0,1,fp8,fp8,0,0.08658132950464885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,float16,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,float16,0,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,fp8,0,0.09075733025868733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,float16,0,0.09029866258303325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,64,128,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,64,128,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,64,0,1,fp8,fp8,0,0.08471999565760295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,fp8,0,0.09102933605511983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,float16,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,float16,0,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,float16,0,0.08726400136947632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,fp8,0,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,fp8,0,0.08854933579762776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,64,0,1,fp8,fp8,0,0.08267199993133545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,float16,0,0.08890133102734883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,64,128,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,fp8,0,0.08881066242853801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,64,0,1,fp8,fp8,0,0.08247466882069905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,float16,0,0.13503999511400858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,fp8,0,0.30053865909576416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,fp8,0,0.13804800311724344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,float16,0,0.2987946669260661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,64,128,1,fp8,fp8,0,0.1318880021572113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,64,0,1,fp8,fp8,0,0.2818079988161723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,float16,0,0.07925333579381307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,float16,0,0.17314134041468301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,fp8,0,0.08065066734949748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,64,128,1,fp8,fp8,0,0.08076266447703044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,fp8,0,0.17658666769663492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,64,0,1,fp8,fp8,0,0.1681706706682841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,fp8,0,0.17044800519943237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,64,0,1,fp8,fp8,0,0.1579039990901947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,float16,0,0.07458666463692983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,float16,0,0.16957332690556845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,fp8,0,0.07667733232180278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,64,128,1,fp8,fp8,0,0.07231999933719635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,float16,0,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,float16,0,0.10909333825111389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,fp8,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,64,128,1,fp8,fp8,0,0.04572266836961111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,64,128,1,fp8,fp8,0,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,fp8,0,0.1107360025246938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,64,0,1,fp8,fp8,0,0.10334933797518413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,float16,0,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,float16,0,0.10709866881370544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,fp8,0,0.045594667394955955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,fp8,0,0.10884799559911092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,64,0,1,fp8,fp8,0,0.10123200217882793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,64,0,1,fp8,fp8,0,0.06739200154940288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,float16,0,0.06955733398596446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,float16,0,0.07037333150704701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,fp8,0,0.07047466437021892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,64,128,1,fp8,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,fp8,0,0.0714026689529419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,float16,0,0.0660693347454071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,float16,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,fp8,0,0.03050133337577184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,64,128,1,fp8,fp8,0,0.028223998844623566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,64,0,1,fp8,fp8,0,0.06565333406130473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,float16,0,0.026208000878492992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,float16,0,0.06566933294137318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,64,128,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,fp8,0,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,64,0,1,fp8,fp8,0,0.06218666831652323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,float16,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,fp8,0,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,float16,0,0.06451733410358429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,64,128,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,fp8,0,0.06576533118883769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,64,0,1,fp8,fp8,0,0.061834668119748436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,float16,0,0.02441066751877467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,fp8,0,0.0641599992911021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,64,0,1,fp8,fp8,0,0.059861332178115845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,fp8,0,0.06384533147017162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,float16,0,0.06438933312892914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,float16,0,0.11310933033625285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,fp8,0,0.11146666606267293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,64,128,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,64,0,1,fp8,fp8,0,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,float16,0,0.20151466131210327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,64,128,1,fp8,fp8,0,0.111653337876002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,fp8,0,0.2015893260637919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,float16,0,0.06471999982992808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,64,0,1,fp8,fp8,0,0.19357866048812866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,float16,0,0.11955199639002483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,fp8,0,0.06591466565926869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,fp8,0,0.06165333092212677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,64,128,1,fp8,fp8,0,0.06307733555634816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,fp8,0,0.12128000458081563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,64,0,1,fp8,fp8,0,0.1145919958750407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,float16,0,0.06196266909440359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,float16,0,0.11711466312408447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,64,128,1,fp8,fp8,0,0.058101331194241844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,fp8,0,0.11728533109029134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,64,0,1,fp8,fp8,0,0.1088746686776479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,float16,0,0.07866666714350383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,64,128,1,fp8,fp8,0,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,fp8,0,0.07964266836643219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,64,0,1,fp8,fp8,0,0.07322133580843608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,float16,0,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,float16,0,0.07654933134714763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,fp8,0,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,64,128,1,fp8,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,fp8,0,0.07623466849327087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,64,0,1,fp8,fp8,0,0.07162133355935414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,float16,0,0.05624000231424967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,fp8,0,0.029098667204380035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,64,128,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,fp8,0,0.05787200232346853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,64,0,1,fp8,fp8,0,0.05414933462937673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,64,0,1,fp8,fp8,0,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,float16,0,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,64,128,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,float16,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,float16,0,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,64,128,1,fp8,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,fp8,0,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,64,0,1,fp8,fp8,0,0.049813335140546165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,float16,0,0.05397866666316986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,64,0,1,fp8,fp8,0,0.049882665276527405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,float16,0,0.05199466645717621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,64,128,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,fp8,0,0.05272000034650167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,64,0,1,fp8,fp8,0,0.048858667413393654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,float16,0,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,float16,0,0.20342934131622314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,64,128,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,fp8,0,0.05161599814891815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,64,0,1,fp8,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,float16,0,0.13497066497802734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,fp8,0,0.1367039978504181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,64,128,1,fp8,fp8,0,0.12939733266830444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,fp8,0,0.20668266216913858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,64,0,1,fp8,fp8,0,0.1925706664721171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,float16,0,0.07747733096281688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,float16,0,0.1174773375193278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,fp8,0,0.08038400113582611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,64,128,1,fp8,fp8,0,0.07826666533946991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,fp8,0,0.12132799625396729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,64,0,1,fp8,fp8,0,0.1144480009873708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,float16,0,0.07227199772993724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,float16,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,float16,0,0.11337066690127055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,fp8,0,0.07625066737333934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,64,128,1,fp8,fp8,0,0.07216000060240428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,64,128,1,fp8,fp8,0,0.04587199787298838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,fp8,0,0.11513066291809082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,64,0,1,fp8,fp8,0,0.10733333230018616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,float16,0,0.07216533521811168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,fp8,0,0.04706666866938273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,fp8,0,0.07450133562088013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,64,0,1,fp8,fp8,0,0.07085866729418437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,float16,0,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,float16,0,0.07250133156776428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,fp8,0,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,64,128,1,fp8,fp8,0,0.04354666670163473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,fp8,0,0.031194667021433514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,fp8,0,0.07217066486676534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,64,0,1,fp8,fp8,0,0.06798399984836578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,64,0,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,float16,0,0.0476746658484141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,64,128,1,fp8,fp8,0,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,fp8,0,0.04826133449872335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,fp8,0,0.047594666481018066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,float16,0,0.030746666093667347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,float16,0,0.04655999938646952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,fp8,0,0.030559999247392017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,64,128,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,64,0,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,float16,0,0.04264000058174133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,64,128,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,64,0,1,fp8,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,64,128,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,fp8,0,0.04223466912905375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,64,0,1,fp8,fp8,0,0.038618666430314384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,64,0,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,float16,0,0.04010133445262909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,64,128,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,fp8,0,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,float16,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,fp8,0,0.03984000037113825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,64,0,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,float16,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,64,128,1,fp8,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,fp8,0,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,64,0,1,fp8,fp8,0,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,float16,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,float16,0,0.1467466652393341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,64,0,1,fp8,fp8,0,0.03621333340803782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,float16,0,0.10939199725786845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,float16,0,0.08608000477155049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,fp8,0,0.11317333579063416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,64,128,1,fp8,fp8,0,0.10621333122253418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,fp8,0,0.1479520003000895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,64,0,1,fp8,fp8,0,0.1418506701787313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,float16,0,0.0639466643333435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,fp8,0,0.06736533343791962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,64,128,1,fp8,fp8,0,0.06339733302593231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,fp8,0,0.08705600102742513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,64,0,1,fp8,fp8,0,0.08459200461705525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,float16,0,0.06060799956321716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,float16,0,0.08353066444396973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,fp8,0,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,64,128,1,fp8,fp8,0,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,64,128,1,fp8,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,fp8,0,0.055919999877611794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,fp8,0,0.08453866839408875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,64,0,1,fp8,fp8,0,0.07865066826343536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,float16,0,0.041877334316571556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,float16,0,0.05622933308283488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,64,128,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,fp8,0,0.04358399907747904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,64,0,1,fp8,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,float16,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,float16,0,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,fp8,0,0.053898667295773826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,64,0,1,fp8,fp8,0,0.05206400156021118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,float16,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,fp8,0,0.028901333610216778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,64,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,64,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,64,0,1,fp8,fp8,0,0.03806933263937632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,float16,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,64,128,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,fp8,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,float16,0,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,float16,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,64,0,1,fp8,fp8,0,0.03391999999682108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,float16,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,float16,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,fp8,0,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,64,128,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,64,0,1,fp8,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,float16,0,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,float16,0,0.0332640012105306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,float16,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,64,128,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,fp8,0,0.03387200087308884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,64,0,1,fp8,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,64,0,1,fp8,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,fp8,0,0.022330666581789654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,float16,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,float16,0,0.031712000568707786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,float16,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,fp8,0,0.031680000325044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,64,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,float16,0,0.1384213368097941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,64,0,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,float16,0,0.11517866452534993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,fp8,0,0.11545600493748982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,64,128,1,fp8,fp8,0,0.11215466260910034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,fp8,0,0.13640532890955606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,64,0,1,fp8,fp8,0,0.13385066390037537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,float16,0,0.06666133304437001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,float16,0,0.0804799993832906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,fp8,0,0.06634133557478587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,64,128,1,fp8,fp8,0,0.06695466736952464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,fp8,0,0.07932800054550171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,fp8,0,0.07712533573309581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,64,0,1,fp8,fp8,0,0.07845333218574524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,float16,0,0.06493333478768666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,float16,0,0.07754666606585185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,fp8,0,0.064560001095136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,64,128,1,fp8,fp8,0,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,64,0,1,fp8,fp8,0,0.0735040009021759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,float16,0,0.04041599979003271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,float16,0,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,float16,0,0.05022933085759481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,64,128,1,fp8,fp8,0,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,fp8,0,0.049685334165891014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,fp8,0,0.04971200227737427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,64,0,1,fp8,fp8,0,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,float16,0,0.0498986691236496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,fp8,0,0.04102933406829834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,64,128,1,fp8,fp8,0,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,64,0,1,fp8,fp8,0,0.04760533571243286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,float16,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,float16,0,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,64,128,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,fp8,0,0.0332640012105306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,64,0,1,fp8,fp8,0,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,float16,0,0.0329120010137558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,fp8,0,0.02905600021282832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,fp8,0,0.026746665438016255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,64,128,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,64,0,1,fp8,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,64,128,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,64,0,1,fp8,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,float16,0,0.0216799999276797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,64,128,1,fp8,fp8,0,0.0223786657055219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,64,0,1,fp8,fp8,0,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,float16,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,64,0,1,fp8,fp8,0,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,64,0,1,fp8,fp8,0,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,fp8,0,0.019871999820073444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,fp8,0,0.02586666742960612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,64,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,fp8,0,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,64,0,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,float16,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,float16,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,fp8,0,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,64,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,float16,0,0.1151146690050761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,64,0,1,fp8,fp8,0,0.02477866659561793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,float16,0,0.11340799927711487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,fp8,0,0.11116266250610352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,64,128,1,fp8,fp8,0,0.10965866843859355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,fp8,0,0.1149120032787323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,64,0,1,fp8,fp8,0,0.11070400476455688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,float16,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,float16,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,fp8,0,0.06356800099213918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,64,128,1,fp8,fp8,0,0.06597866614659627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,fp8,0,0.06601066887378693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,64,0,1,fp8,fp8,0,0.06640000144640605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,float16,0,0.062037333846092224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,float16,0,0.06253333389759064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,fp8,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,64,128,1,fp8,fp8,0,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,fp8,0,0.06393066545327504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,64,0,1,fp8,fp8,0,0.06054399907588959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,float16,0,0.041375999649365745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,float16,0,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,64,128,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,fp8,0,0.04109866668780645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,64,0,1,fp8,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,64,0,1,fp8,fp8,0,0.04084266722202301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,float16,0,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,float16,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,64,128,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,64,0,1,fp8,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,64,128,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,64,0,1,fp8,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,float16,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,64,128,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,float16,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,64,128,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,64,0,1,fp8,fp8,0,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,float16,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,64,128,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,64,0,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,float16,0,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,64,128,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,fp8,0,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,64,0,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,64,128,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,64,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,float16,0,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,64,128,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,64,0,1,fp8,fp8,0,0.020319999506076176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,float16,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,float16,0,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,64,128,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,64,0,1,fp8,fp8,0,0.02011200040578842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,float16,0,0.01969066634774208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,float16,0,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,64,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,fp8,0,0.02056533346573512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,fp8,0,0.05791999896367391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,64,128,1,fp8,fp8,0,0.057664001981417336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,float16,0,0.05774933099746704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,float16,0,0.05718400080998739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,fp8,0,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,64,0,1,fp8,fp8,0,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,float16,0,0.03602133442958196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,float16,0,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,float16,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,fp8,0,0.037274666130542755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,fp8,0,0.037434667348861694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,64,128,1,fp8,fp8,0,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,64,128,1,fp8,fp8,0,0.036858665446440377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,64,0,1,fp8,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,float16,0,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,64,0,1,fp8,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,float16,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,64,128,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,64,0,1,fp8,fp8,0,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,fp8,0,0.024933333198229473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,64,0,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,64,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,64,0,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,64,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,fp8,0,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,64,128,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,float16,0,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,64,128,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,64,128,1,fp8,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,64,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,float16,0,0.03876800090074539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,64,0,1,fp8,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,float16,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,64,128,1,fp8,fp8,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,64,0,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,64,128,1,fp8,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,64,0,1,fp8,fp8,0,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,float16,0,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,64,128,1,fp8,fp8,0,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,64,128,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,64,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,float16,0,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,float16,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,64,128,1,fp8,fp8,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,64,0,1,fp8,fp8,0,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,64,0,1,fp8,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,float16,0,0.015599999576807022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,float16,0,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,64,128,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,64,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,fp8,0,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,fp8,0,0.016645333419243496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,64,0,1,fp8,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,64,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,float16,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,float16,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,64,128,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,64,0,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,float16,0,0.021989333132902782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,64,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,64,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,64,0,1,fp8,fp8,0,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,64,0,1,fp8,fp8,0,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,64,128,1,fp8,fp8,0,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,fp8,0,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,float16,0,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,fp8,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,fp8,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,64,0,1,fp8,fp8,0,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,64,128,1,fp8,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,64,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,64,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,64,128,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,float16,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,fp8,0,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,64,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,float16,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,float16,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,64,128,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,64,128,1,fp8,fp8,0,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,64,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,float16,0,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,64,0,1,fp8,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,64,0,1,fp8,fp8,0,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,float16,0,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,64,0,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,float16,0,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,float16,0,0.01381333296497663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,float16,0,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,fp8,0,0.028069332242012024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,float16,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,64,128,1,fp8,fp8,0,0.017797333498795826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,64,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,64,128,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,64,0,1,fp8,fp8,0,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,64,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,64,128,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,64,0,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,64,128,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,fp8,0,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,64,0,1,fp8,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,64,0,1,fp8,fp8,0,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,64,128,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,64,0,1,fp8,fp8,0,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,float16,0,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,float16,0,0.04830400149027506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,float16,0,0.3087679942448934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,64,128,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,fp8,0,0.3141546646753947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,float16,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,64,0,1,fp8,fp8,0,0.28917332490285236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,float16,0,0.20569066206614176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,fp8,0,0.03384000062942505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,64,128,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,fp8,0,0.20567999283472696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,64,0,1,fp8,fp8,0,0.19132266441980997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,float16,0,0.030181333422660828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,float16,0,0.20207999149958292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,64,128,1,fp8,fp8,0,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,fp8,0,0.2015626629193624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,64,0,1,fp8,fp8,0,0.18718934059143066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,float16,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,float16,0,0.20297600825627646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,64,128,1,fp8,fp8,0,0.04212800165017446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,fp8,0,0.20356800158818564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,64,0,1,fp8,fp8,0,0.18897066513697305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,float16,0,0.1586133340994517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,fp8,0,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,float16,0,0.15546666582425436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,64,128,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,fp8,0,0.1578933298587799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,64,0,1,fp8,fp8,0,0.14645333091417947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,float16,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,fp8,0,0.041349334021409355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,64,128,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,fp8,0,0.1542026698589325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,fp8,0,0.15240533153216043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,64,0,1,fp8,fp8,0,0.14387200276056925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,float16,0,0.0409706657131513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,float16,0,0.15204266707102457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,64,128,1,fp8,fp8,0,0.03866666555404663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,64,0,1,fp8,fp8,0,0.1397386689980825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,float16,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,fp8,0,0.02815466622511546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,float16,0,0.13405866424242655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,64,128,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,fp8,0,0.13498133420944214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,64,0,1,fp8,fp8,0,0.12546666463216147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,float16,0,0.026144000391165417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,64,0,1,fp8,fp8,0,0.12336533268292744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,float16,0,0.13121599952379862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,64,128,1,fp8,fp8,0,0.02480533222357432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,fp8,0,0.13172800342241922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,float16,0,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,float16,0,0.17707733313242593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,fp8,0,0.04826666911443075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,fp8,0,0.031125334401925404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,64,128,1,fp8,fp8,0,0.046495998899141945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,fp8,0,0.17903999487559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,fp8,0,0.11590400338172913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,64,0,1,fp8,fp8,0,0.16581867138544717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,float16,0,0.11531733473141988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,64,128,1,fp8,fp8,0,0.03081600119670232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,64,0,1,fp8,fp8,0,0.1090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,float16,0,0.11135466893513997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,fp8,0,0.11114666859308879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,64,0,1,fp8,fp8,0,0.10378133257230122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,float16,0,0.0249493345618248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,float16,0,0.10935466488202412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,64,128,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,fp8,0,0.10934399565060933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,64,0,1,fp8,fp8,0,0.10124267141024272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,float16,0,0.11963733037312825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,64,128,1,fp8,fp8,0,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,fp8,0,0.1209279994169871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,64,0,1,fp8,fp8,0,0.11322666207949321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,float16,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,float16,0,0.09052800138791402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,fp8,0,0.029125332832336426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,64,128,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,fp8,0,0.09060266613960266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,64,0,1,fp8,fp8,0,0.08654933174451192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,float16,0,0.08678399523099263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,64,128,1,fp8,fp8,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,fp8,0,0.0865280032157898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,fp8,0,0.0865226686000824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,64,0,1,fp8,fp8,0,0.08044266700744629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,float16,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,float16,0,0.08524266878763835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,64,0,1,fp8,fp8,0,0.07845866680145264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,float16,0,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,float16,0,0.1090880036354065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,fp8,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,64,128,1,fp8,fp8,0,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,64,128,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,fp8,0,0.11150933305422465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,64,0,1,fp8,fp8,0,0.10255466898282369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,float16,0,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,float16,0,0.0724480003118515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,64,128,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,fp8,0,0.07214400172233582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,64,0,1,fp8,fp8,0,0.06598933537801106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,float16,0,0.06286933521429698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,float16,0,0.06514133512973785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,64,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,fp8,0,0.06389866769313812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,fp8,0,0.06619200110435486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,64,0,1,fp8,fp8,0,0.05992533266544342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,64,0,1,fp8,fp8,0,0.058378666639328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,float16,0,0.062261333068211876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,fp8,0,0.062133332093556724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,64,0,1,fp8,fp8,0,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,float16,0,0.04345599810282389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,float16,0,0.07874133189519246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,fp8,0,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,64,128,1,fp8,fp8,0,0.04085866610209147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,fp8,0,0.07881600161393483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,64,0,1,fp8,fp8,0,0.07421333094437917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,float16,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,float16,0,0.057477335135142006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,64,128,1,fp8,fp8,0,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,fp8,0,0.058037335673967995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,64,0,1,fp8,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,float16,0,0.05277866621812185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,64,128,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,fp8,0,0.05213333169619242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,64,0,1,fp8,fp8,0,0.04979733129342397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,float16,0,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,64,128,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,fp8,0,0.05182399849096934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,64,0,1,fp8,fp8,0,0.047637333472569786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,float16,0,0.047594666481018066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,float16,0,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,64,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,64,0,1,fp8,fp8,0,0.04760533571243286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,float16,0,0.07461333274841309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,fp8,0,0.04933866858482361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,64,128,1,fp8,fp8,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,fp8,0,0.07498666644096375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,64,0,1,fp8,fp8,0,0.0701333334048589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,float16,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,float16,0,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,64,128,1,fp8,fp8,0,0.030245333909988403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,fp8,0,0.050101334849993386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,64,128,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,64,0,1,fp8,fp8,0,0.0476746658484141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,float16,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,fp8,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,64,128,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,64,0,1,fp8,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,float16,0,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,float16,0,0.04138666639725367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,fp8,0,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,64,0,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,64,128,1,fp8,fp8,0,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,fp8,0,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,64,0,1,fp8,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,float16,0,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,float16,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,64,128,1,fp8,fp8,0,0.020389333367347717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,fp8,0,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,64,0,1,fp8,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,float16,0,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,float16,0,0.055957332253456116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,fp8,0,0.04350399971008301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,64,128,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,64,128,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,fp8,0,0.05809600154558817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,64,0,1,fp8,fp8,0,0.053344001372655235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,float16,0,0.028778667251269024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,float16,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,fp8,0,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,64,0,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,64,128,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,64,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,float16,0,0.034629332522551216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,fp8,0,0.02204799900452296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,fp8,0,0.035205334424972534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,64,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,float16,0,0.03317866722742716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,fp8,0,0.03230399886767069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,64,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,64,0,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,float16,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,64,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,float16,0,0.04043733328580856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,float16,0,0.0496319979429245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,fp8,0,0.04077333211898804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,64,128,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,fp8,0,0.04971200227737427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,64,0,1,fp8,fp8,0,0.04753600060939789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,64,0,1,fp8,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,float16,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,64,128,1,fp8,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,float16,0,0.022895999252796173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,float16,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,float16,0,0.030245333909988403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,64,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,64,0,1,fp8,fp8,0,0.028016000986099243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,float16,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,64,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,64,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,float16,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,64,0,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,64,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,float16,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,float16,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,fp8,0,0.041162667175134025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,float16,0,0.04009066770474116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,float16,0,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,fp8,0,0.04040000090996424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,64,128,1,fp8,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,64,0,1,fp8,fp8,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,64,0,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,float16,0,0.026719999810059864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,float16,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,64,128,1,fp8,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,float16,0,0.022240000466505688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,64,0,1,fp8,fp8,0,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,float16,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,float16,0,0.020794666061798733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,64,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,float16,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,64,0,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,float16,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,fp8,0,0.020469332734743755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,64,128,1,fp8,fp8,0,0.020207999895016353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,64,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,64,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,64,128,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,64,128,1,fp8,fp8,0,0.023749334116776783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,64,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,fp8,0,0.02083733429511388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,64,0,1,fp8,fp8,0,0.0207893339296182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,fp8,0,0.01812800019979477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,float16,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,64,0,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,float16,0,0.016645333419243496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,64,128,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,64,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,64,128,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,float16,0,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,float16,0,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,64,0,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,64,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,64,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,fp8,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,float16,0,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,64,128,1,fp8,fp8,0,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,float16,0,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,64,128,1,fp8,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,64,0,1,fp8,fp8,0,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,64,0,1,fp8,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,64,128,1,fp8,fp8,0,0.013429333766301474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,64,128,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,64,0,1,fp8,fp8,0,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,float16,0,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,float16,0,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,64,128,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,64,0,1,fp8,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,64,128,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,64,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,float16,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,64,128,1,fp8,fp8,0,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,fp8,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,64,128,1,fp8,fp8,0,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,64,0,1,fp8,fp8,0,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,128,0,1,fp8,fp8,0,36.55750274658203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,128,0,1,fp8,fp8,0,36.50861358642578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,fp8,0,47.60674031575521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,float16,0,47.7470957438151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,fp8,0,47.168548583984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,float16,0,47.74738566080729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,float16,0,47.730244954427086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,fp8,0,47.131937662760414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,float16,0,24.47345479329427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,128,0,1,fp8,fp8,0,18.3111089070638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,float16,0,23.94469960530599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,fp8,0,24.137781778971355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,fp8,0,24.604085286458332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,128,0,1,fp8,fp8,0,36.73318990071615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,float16,0,23.95898691813151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,128,0,1,fp8,fp8,0,18.339599609375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,float16,0,12.38543955485026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,fp8,0,24.109817504882812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,fp8,0,12.458810170491537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,128,0,1,fp8,fp8,0,18.242773691813152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,float16,0,24.12940724690755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,128,0,1,fp8,fp8,0,9.923903783162435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,fp8,0,24.40093231201172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,fp8,0,12.040772755940756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,float16,0,12.26638921101888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,128,0,1,fp8,fp8,0,9.195103963216146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,float16,0,12.220347086588541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,128,0,1,fp8,fp8,0,9.370672225952148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,float16,0,6.464373270670573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,fp8,0,11.984293619791666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,128,0,1,fp8,fp8,0,9.271557490030924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,float16,0,12.209519704182943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,fp8,0,6.522261301676433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,fp8,0,12.279562632242838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,128,0,1,fp8,fp8,0,5.1212107340494795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,float16,0,6.281216303507487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,fp8,0,6.276986440022786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,128,0,1,fp8,fp8,0,4.868970553080241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,float16,0,6.302394866943359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,128,0,1,fp8,fp8,0,4.919551849365234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,fp8,0,6.19488525390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,float16,0,6.34391975402832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,128,0,1,fp8,fp8,0,4.8930613199869795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,fp8,0,6.341536204020183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,128,0,1,fp8,fp8,0,21.87226104736328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,128,0,1,fp8,fp8,0,21.304149627685547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,float16,0,27.44597880045573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,fp8,0,28.37303415934245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,float16,0,28.136271158854168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,fp8,0,28.242469787597656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,float16,0,27.689921061197918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,float16,0,14.494842529296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,128,0,1,fp8,fp8,0,11.485594431559244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,fp8,0,14.882645924886068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,float16,0,14.037109375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,fp8,0,13.904117584228516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,128,0,1,fp8,fp8,0,21.58142344156901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,fp8,0,28.33807373046875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,128,0,1,fp8,fp8,0,10.707237243652344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,float16,0,14.038079579671225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,128,0,1,fp8,fp8,0,10.845685323079428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,fp8,0,13.999888102213541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,float16,0,14.011466979980469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,float16,0,7.631354649861653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,fp8,0,13.915701548258463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,128,0,1,fp8,fp8,0,10.723392486572266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,128,0,1,fp8,fp8,0,5.942650477091472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,fp8,0,7.606522878011067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,float16,0,7.1246077219645185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,128,0,1,fp8,fp8,0,5.520602544148763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,fp8,0,7.129264195760091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,128,0,1,fp8,fp8,0,5.597557067871094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,float16,0,6.985525131225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,fp8,0,7.084373474121094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,float16,0,7.193962732950847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,float16,0,3.946634610493978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,128,0,1,fp8,fp8,0,3.174015998840332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,fp8,0,3.8515199025472007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,128,0,1,fp8,fp8,0,5.56934928894043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,fp8,0,7.26533317565918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,float16,0,3.5836480458577475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,fp8,0,3.758314768473307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,128,0,1,fp8,fp8,0,3.024149258931478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,128,0,1,fp8,fp8,0,2.9999945958455405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,float16,0,3.728032112121582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,fp8,0,3.7320480346679688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,float16,0,3.664463996887207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,fp8,0,3.73857053120931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,128,0,1,fp8,fp8,0,3.069530804951986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,128,0,1,fp8,fp8,0,15.374170939127604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,128,0,1,fp8,fp8,0,15.304448445638021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,float16,0,20.443349202473957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,fp8,0,19.98346710205078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,float16,0,20.007418314615887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,fp8,0,19.999093373616535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,float16,0,20.17260233561198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,128,0,1,fp8,fp8,0,8.445615768432617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,float16,0,10.636549631754557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,fp8,0,10.63922119140625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,float16,0,10.199167887369791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,fp8,0,10.005125045776367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,128,0,1,fp8,fp8,0,15.462037404378256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,fp8,0,19.831104278564453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,128,0,1,fp8,fp8,0,7.835733413696289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,float16,0,10.059722900390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,128,0,1,fp8,fp8,0,7.732581456502278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,fp8,0,10.219610850016275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,float16,0,10.011077245076498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,128,0,1,fp8,fp8,0,7.825626373291016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,float16,0,5.4785919189453125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,fp8,0,9.958741505940756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,128,0,1,fp8,fp8,0,4.243338584899902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,fp8,0,5.595925649007161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,float16,0,5.023088137308757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,128,0,1,fp8,fp8,0,4.034063975016276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,fp8,0,5.2206878662109375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,float16,0,5.083285331726074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,128,0,1,fp8,fp8,0,4.0569868087768555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,fp8,0,5.203920046488444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,float16,0,2.8455092112223306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,float16,0,5.171061197916667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,fp8,0,5.305775960286458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,128,0,1,fp8,fp8,0,4.050309181213379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,fp8,0,2.9357118606567383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,128,0,1,fp8,fp8,0,2.3152693112691245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,float16,0,2.6054186820983887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,128,0,1,fp8,fp8,0,2.4169012705485025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,fp8,0,2.625669320424398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,fp8,0,2.588810602823893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,128,0,1,fp8,fp8,0,2.3746506373087564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,float16,0,2.7575626373291016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,float16,0,2.6144372622172036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,fp8,0,2.599952061971029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,128,0,1,fp8,fp8,0,2.2274667421976724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,128,0,1,fp8,fp8,0,21.146804809570312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,128,0,1,fp8,fp8,0,20.510042826334637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,float16,0,27.341883341471355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,fp8,0,27.312772115071613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,float16,0,27.41685740152995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,fp8,0,26.973414103190105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,float16,0,27.214324951171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,float16,0,13.49612808227539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,float16,0,14.422704060872396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,fp8,0,14.236000061035156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,fp8,0,13.619392395019531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,128,0,1,fp8,fp8,0,20.497034708658855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,fp8,0,27.491444905598957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,128,0,1,fp8,fp8,0,10.192693074544271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,128,0,1,fp8,fp8,0,10.455178578694662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,float16,0,13.397866566975912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,fp8,0,13.449007670084635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,float16,0,13.538640340169271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,128,0,1,fp8,fp8,0,5.693146387736003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,float16,0,7.128026962280273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,128,0,1,fp8,fp8,0,10.569344202677408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,fp8,0,13.525882720947266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,fp8,0,7.333082834879558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,float16,0,6.637189229329427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,128,0,1,fp8,fp8,0,5.220981280008952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,fp8,0,6.501375834147136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,128,0,1,fp8,fp8,0,5.246762593587239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,float16,0,6.582666397094727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,fp8,0,6.995946884155273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,float16,0,6.71510378519694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,float16,0,3.7132479349772134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,fp8,0,3.6246773401896157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,128,0,1,fp8,fp8,0,2.9896907806396484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,128,0,1,fp8,fp8,0,5.263445218404134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,fp8,0,6.617183685302734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,float16,0,3.4478238423665366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,fp8,0,3.50602658589681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,128,0,1,fp8,fp8,0,2.8239307403564453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,float16,0,3.5085280736287436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,fp8,0,3.4615198771158853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,128,0,1,fp8,fp8,0,2.803386688232422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,float16,0,1.9031039873758953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,float16,0,3.536842664082845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,128,0,1,fp8,fp8,0,2.7915948232014975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,fp8,0,3.5365174611409507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,128,0,1,fp8,fp8,0,1.634602705637614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,fp8,0,1.9020640055338542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,float16,0,1.810640017191569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,128,0,1,fp8,fp8,0,1.5583573977152507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,fp8,0,1.8026080131530762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,128,0,1,fp8,fp8,0,1.588805357615153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,float16,0,1.878442605336507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,fp8,0,1.798624038696289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,float16,0,1.8221119244893391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,fp8,0,1.77838929494222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,128,0,1,fp8,fp8,0,1.5828746159871419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,128,0,1,fp8,fp8,0,12.464234670003256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,float16,0,16.193893432617188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,fp8,0,16.0141118367513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,float16,0,16.447770436604817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,fp8,0,16.469397226969402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,128,0,1,fp8,fp8,0,12.375301361083984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,float16,0,8.787365595499674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,fp8,0,8.820330937703451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,128,0,1,fp8,fp8,0,7.003344217936198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,128,0,1,fp8,fp8,0,12.512725830078125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,float16,0,16.537920633951824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,fp8,0,16.020980834960938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,128,0,1,fp8,fp8,0,6.252042770385742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,float16,0,8.184362411499023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,fp8,0,7.977280298868815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,float16,0,8.113360087076822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,fp8,0,8.071269353230795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,128,0,1,fp8,fp8,0,6.279770533243815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,128,0,1,fp8,fp8,0,6.307050704956055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,float16,0,4.411493301391602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,fp8,0,4.406245231628418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,float16,0,7.973125457763672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,128,0,1,fp8,fp8,0,3.5076586405436196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,fp8,0,7.981669108072917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,float16,0,4.207386652628581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,128,0,1,fp8,fp8,0,3.2862186431884766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,fp8,0,4.138133366902669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,float16,0,4.122693379720052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,128,0,1,fp8,fp8,0,3.3114134470621743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,fp8,0,3.969834645589193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,float16,0,2.1674399375915527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,128,0,1,fp8,fp8,0,1.9285705884297688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,fp8,0,2.2305332819620767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,128,0,1,fp8,fp8,0,3.2882614135742188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,float16,0,4.099418640136719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,fp8,0,4.0425065358479815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,fp8,0,2.0376906394958496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,float16,0,2.0676639874776206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,128,0,1,fp8,fp8,0,1.8347519238789876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,float16,0,2.012885411580404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,128,0,1,fp8,fp8,0,1.8233173688252766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,fp8,0,2.08349339167277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,float16,0,1.2213119665781658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,float16,0,2.0366080602010093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,128,0,1,fp8,fp8,0,1.15338134765625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,128,0,1,fp8,fp8,0,1.763578732808431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,fp8,0,1.1995786825815837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,fp8,0,2.0408533414204917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,float16,0,1.123242696126302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,fp8,0,1.1448640028635662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,128,0,1,fp8,fp8,0,1.0169226328531902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,float16,0,1.1325866381327312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,fp8,0,1.143514633178711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,128,0,1,fp8,fp8,0,1.0197066466013591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,float16,0,1.1245333353678386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,fp8,0,1.131599982579549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,128,0,1,fp8,fp8,0,1.006757338841756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,128,0,1,fp8,fp8,0,12.717088063557943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,128,0,1,fp8,fp8,0,12.7718874613444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,fp8,0,16.256298065185547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,float16,0,16.938971201578777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,float16,0,16.387989044189453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,fp8,0,16.471253712972004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,float16,0,8.953045527140299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,fp8,0,9.051322937011719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,128,0,1,fp8,fp8,0,13.005242665608725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,float16,0,16.512884775797527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,fp8,0,16.640234629313152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,float16,0,8.198421478271484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,fp8,0,8.289013544718424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,128,0,1,fp8,fp8,0,6.362986882527669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,float16,0,8.312698364257812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,fp8,0,8.2041015625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,128,0,1,fp8,fp8,0,6.4100691477457685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,float16,0,8.122512181599935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,float16,0,4.544698715209961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,fp8,0,8.177141189575195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,128,0,1,fp8,fp8,0,6.4163468678792315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,fp8,0,4.474623998006185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,128,0,1,fp8,fp8,0,3.643967946370443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,float16,0,4.113375981648763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,fp8,0,4.037930806477864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,128,0,1,fp8,fp8,0,3.2696587244669595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,float16,0,4.062506675720215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,fp8,0,3.9261385599772134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,128,0,1,fp8,fp8,0,3.317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,fp8,0,4.021226565043132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,float16,0,4.092138608296712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,float16,0,2.167130629221598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,128,0,1,fp8,fp8,0,3.427856127421061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,fp8,0,2.1948906580607095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,128,0,1,fp8,fp8,0,1.9734986623128254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,float16,0,2.001274744669596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,fp8,0,2.0019520123799643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,128,0,1,fp8,fp8,0,1.7254239718119304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,float16,0,1.9804372787475586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,fp8,0,1.9889920552571614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,128,0,1,fp8,fp8,0,1.762671947479248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,float16,0,2.012608051300049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,fp8,0,2.0383893648783364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,128,0,1,fp8,fp8,0,1.7713066736857097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,float16,0,1.184223969777425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,fp8,0,1.1660799980163574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,128,0,1,fp8,fp8,0,1.0640959739685059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,float16,0,1.0625759760538738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,fp8,0,1.0713919798533122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,128,0,1,fp8,fp8,0,0.9657066663106283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,float16,0,1.0803306897481282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,fp8,0,1.0662453174591064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,128,0,1,fp8,fp8,0,0.9559679826100668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,float16,0,1.0852533181508381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,128,0,1,fp8,fp8,0,0.9526986281077067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,fp8,0,1.087509314219157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,float16,0,0.6474133332570394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,fp8,0,0.6613813241322836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,128,0,1,fp8,fp8,0,0.5998293161392212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,float16,0,0.6131199995676676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,fp8,0,0.6189173460006714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,128,0,1,fp8,fp8,0,0.5618559916814169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,float16,0,0.6191573143005371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,fp8,0,0.6188586552937826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,128,0,1,fp8,fp8,0,0.5604586601257324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,float16,0,0.6216586828231812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,fp8,0,0.6210506757100424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,128,0,1,fp8,fp8,0,0.564026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,128,0,1,fp8,fp8,0,8.13051732381185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,float16,0,9.974122365315756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,fp8,0,10.089370727539062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,float16,0,9.962495803833008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,fp8,0,10.178842544555664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,float16,0,5.725194931030273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,128,0,1,fp8,fp8,0,8.178938547770182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,128,0,1,fp8,fp8,0,8.229397455851236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,float16,0,10.470912297566732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,fp8,0,10.441375732421875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,128,0,1,fp8,fp8,0,4.616527875264485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,fp8,0,5.863946914672852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,float16,0,5.081472078959147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,128,0,1,fp8,fp8,0,4.080149332682292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,fp8,0,4.975061416625977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,float16,0,5.134026527404785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,fp8,0,5.071237246195476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,128,0,1,fp8,fp8,0,4.115872065226237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,float16,0,4.97268803914388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,fp8,0,5.195306777954102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,float16,0,2.7515414555867515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,fp8,0,2.82585080464681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,128,0,1,fp8,fp8,0,4.146069208780925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,128,0,1,fp8,fp8,0,2.4135093688964844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,float16,0,2.4434293111165366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,fp8,0,2.484837373097738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,128,0,1,fp8,fp8,0,2.1452266375223794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,float16,0,2.4871253967285156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,fp8,0,2.428042729695638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,128,0,1,fp8,fp8,0,2.1346453030904136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,float16,0,2.5318613052368164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,fp8,0,2.5167039235432944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,128,0,1,fp8,fp8,0,2.1727360089619956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,float16,0,1.3993706703186035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,fp8,0,1.4110080401102703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,128,0,1,fp8,fp8,0,1.2672106424967449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,float16,0,1.2988533178965251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,fp8,0,1.2883040110270183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,128,0,1,fp8,fp8,0,1.1358986695607503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,float16,0,1.2788106600443523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,128,0,1,fp8,fp8,0,1.1374826431274414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,fp8,0,1.295141299565633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,float16,0,1.2988853454589844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,fp8,0,1.282960017522176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,128,0,1,fp8,fp8,0,1.1350560188293457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,float16,0,0.7559680143992106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,fp8,0,0.7726346651713053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,128,0,1,fp8,fp8,0,0.6902666886647543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,float16,0,0.6971093018849691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,fp8,0,0.6994880040486654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,128,0,1,fp8,fp8,0,0.6297493378321329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,float16,0,0.707045316696167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,fp8,0,0.7103412946065267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,128,0,1,fp8,fp8,0,0.6288106838862101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,float16,0,0.7038133144378662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,fp8,0,0.7073919773101807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,128,0,1,fp8,fp8,0,0.633568008740743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,float16,0,0.4411093393961589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,fp8,0,0.44544533888498944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,128,0,1,fp8,fp8,0,0.41036800543467206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,float16,0,0.41495998700459796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,fp8,0,0.41497600078582764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,128,0,1,fp8,fp8,0,0.38071465492248535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,float16,0,0.4148053328196208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,fp8,0,0.4171786705652873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,128,0,1,fp8,fp8,0,0.3816959857940674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,float16,0,0.4184693495432536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,fp8,0,0.41857067743937176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,128,0,1,fp8,fp8,0,0.3831413189570109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,128,0,1,fp8,fp8,0,8.917119979858398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,float16,0,10.596218744913736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,fp8,0,10.648176193237305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,float16,0,10.85601552327474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,fp8,0,10.718276977539062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,128,0,1,fp8,fp8,0,8.953098932902018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,128,0,1,fp8,fp8,0,9.023344039916992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,float16,0,11.48523203531901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,fp8,0,11.334971110026041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,float16,0,6.219370524088542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,fp8,0,6.1549866994222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,128,0,1,fp8,fp8,0,5.087216059366862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,fp8,0,5.3266401290893555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,float16,0,5.293274561564128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,128,0,1,fp8,fp8,0,4.456565221150716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,float16,0,5.399717330932617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,fp8,0,5.313450813293457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,128,0,1,fp8,fp8,0,4.471317291259766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,float16,0,5.402095794677734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,fp8,0,5.362853368123372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,float16,0,3.082890510559082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,128,0,1,fp8,fp8,0,4.491920153299968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,fp8,0,3.0824639002482095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,128,0,1,fp8,fp8,0,2.6345866521199546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,float16,0,2.5990452766418457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,128,0,1,fp8,fp8,0,2.267653306325277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,fp8,0,2.6513120333353677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,float16,0,2.6116746266682944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,fp8,0,2.60808531443278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,128,0,1,fp8,fp8,0,2.2793119748433432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,float16,0,2.678405443827311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,128,0,1,fp8,fp8,0,2.312021255493164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,float16,0,1.5123786926269531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,fp8,0,2.708714803059896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,fp8,0,1.5228160222371419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,128,0,1,fp8,fp8,0,1.361610730489095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,float16,0,1.3455413182576497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,fp8,0,1.3524586359659831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,128,0,1,fp8,fp8,0,1.217349370320638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,float16,0,1.3654932975769043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,fp8,0,1.365557352701823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,128,0,1,fp8,fp8,0,1.1903093655904133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,float16,0,1.35206937789917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,128,0,1,fp8,fp8,0,1.193669319152832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,fp8,0,1.3751200040181477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,float16,0,0.7960426807403564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,fp8,0,0.8073226610819498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,128,0,1,fp8,fp8,0,0.7385439872741699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,float16,0,0.717029333114624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,fp8,0,0.7244373162587484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,128,0,1,fp8,fp8,0,0.6685973008473715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,float16,0,0.7224106788635254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,fp8,0,0.725279966990153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,128,0,1,fp8,fp8,0,0.6394720077514648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,float16,0,0.7283519903818766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,fp8,0,0.7334079742431641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,128,0,1,fp8,fp8,0,0.645962675412496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,float16,0,0.4407840172449748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,128,0,1,fp8,fp8,0,0.4071040153503418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,fp8,0,0.4487146536509196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,float16,0,0.40651198228200275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,fp8,0,0.4075733423233032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,128,0,1,fp8,fp8,0,0.3662986755371094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,float16,0,0.406607985496521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,fp8,0,0.4100319941838582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,128,0,1,fp8,fp8,0,0.36908264954884845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,float16,0,0.40989331404368085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,fp8,0,0.41174399852752686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,128,0,1,fp8,fp8,0,0.3693759838740031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,float16,0,0.26633065938949585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,fp8,0,0.27191466093063354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,128,0,1,fp8,fp8,0,0.249616007010142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,float16,0,0.24381866057713827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,fp8,0,0.24496533473332724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,float16,0,0.24451200167338052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,128,0,1,fp8,fp8,0,0.22813334067662558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,fp8,0,0.24784000714619955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,128,0,1,fp8,fp8,0,0.2284160057703654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,float16,0,0.24685867627461752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,fp8,0,0.2476960023244222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,128,0,1,fp8,fp8,0,0.23019200563430786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,128,0,1,fp8,fp8,0,5.9800160725911455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,float16,0,6.936389287312825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,fp8,0,6.934719721476237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,float16,0,7.076175689697266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,fp8,0,7.1575361887613935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,float16,0,4.071349461873372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,128,0,1,fp8,fp8,0,6.008607864379883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,128,0,1,fp8,fp8,0,6.047125498453776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,float16,0,7.0873597462972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,fp8,0,7.034538904825847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,128,0,1,fp8,fp8,0,3.4798453648885093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,fp8,0,4.168575922648112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,128,0,1,fp8,fp8,0,2.991445223490397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,float16,0,3.4517014821370444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,fp8,0,3.5211734771728516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,float16,0,3.5222241083780923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,fp8,0,3.5131041208902993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,128,0,1,fp8,fp8,0,3.00489075978597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,float16,0,3.520106633504232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,fp8,0,3.5374933878580728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,float16,0,2.0095359484354653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,fp8,0,2.012869358062744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,128,0,1,fp8,fp8,0,3.0480321248372397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,128,0,1,fp8,fp8,0,1.7896000544230144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,float16,0,1.7521012624104817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,fp8,0,1.7687946955362956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,128,0,1,fp8,fp8,0,1.5588587125142415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,float16,0,1.7624799410502117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,fp8,0,1.7674773534138997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,128,0,1,fp8,fp8,0,1.5374080340067546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,float16,0,1.7667786280314128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,fp8,0,1.7863893508911133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,128,0,1,fp8,fp8,0,1.5536692937215169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,float16,0,1.0194239616394043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,fp8,0,1.0431733131408691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,128,0,1,fp8,fp8,0,0.9315253098805746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,float16,0,0.9160319964090983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,fp8,0,0.9180053075154623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,128,0,1,fp8,fp8,0,0.8061280250549316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,float16,0,0.9202613035837809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,fp8,0,0.924186627070109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,128,0,1,fp8,fp8,0,0.8085920015970866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,float16,0,0.9260000387827555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,float16,0,0.5452586809794108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,128,0,1,fp8,fp8,0,0.8152426878611246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,fp8,0,0.9308266639709473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,fp8,0,0.5577119986216227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,128,0,1,fp8,fp8,0,0.5028853416442871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,float16,0,0.4932639996210734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,fp8,0,0.4963200092315674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,128,0,1,fp8,fp8,0,0.43910932540893555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,float16,0,0.4967893362045288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,fp8,0,0.4991999864578247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,128,0,1,fp8,fp8,0,0.4413013458251953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,float16,0,0.5006453196207682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,fp8,0,0.5021599928538004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,128,0,1,fp8,fp8,0,0.4440319935480754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,float16,0,0.3096746603647868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,fp8,0,0.31759466727574664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,float16,0,0.28111465771993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,fp8,0,0.2809706727663676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,128,0,1,fp8,fp8,0,0.28749332825342816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,float16,0,0.28298133611679077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,128,0,1,fp8,fp8,0,0.2561546762784322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,fp8,0,0.2834666570027669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,float16,0,0.2861493428548177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,fp8,0,0.2861119906107585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,128,0,1,fp8,fp8,0,0.2585066755612691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,128,0,1,fp8,fp8,0,0.2589226762453715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,float16,0,0.19204266866048178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,fp8,0,0.19694934288660684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,128,0,1,fp8,fp8,0,0.17892267306645712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,float16,0,0.1742080052693685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,fp8,0,0.1753013332684835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,float16,0,0.1742666761080424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,128,0,1,fp8,fp8,0,0.16030399998029074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,fp8,0,0.17375467220942178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,128,0,1,fp8,fp8,0,0.1602186659971873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,float16,0,0.17449599504470825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,fp8,0,0.17553067207336426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,128,0,1,fp8,fp8,0,0.1623146633307139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,128,0,1,fp8,fp8,0,6.62290636698405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,float16,0,7.405440012613933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,fp8,0,7.3821226755778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,float16,0,7.766885121663411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,fp8,0,7.810895919799805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,float16,0,4.6235198974609375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,128,0,1,fp8,fp8,0,7.232373555501302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,float16,0,7.9174455006917315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,fp8,0,7.903504053751628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,128,0,1,fp8,fp8,0,7.295157114664714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,fp8,0,4.6317440668741865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,128,0,1,fp8,fp8,0,3.3322718938191733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,fp8,0,3.6858132680257163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,float16,0,3.6654720306396484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,float16,0,3.8649705251057944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,128,0,1,fp8,fp8,0,3.6204586029052734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,fp8,0,3.855221430460612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,float16,0,3.8541812896728516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,float16,0,2.180997371673584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,fp8,0,2.2215894063313804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,fp8,0,3.945717175801595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,128,0,1,fp8,fp8,0,3.6115573247273765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,float16,0,1.8635786374409993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,128,0,1,fp8,fp8,0,2.0752053260803223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,fp8,0,1.8665599822998047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,128,0,1,fp8,fp8,0,1.683183987935384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,float16,0,1.8605066935221355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,128,0,1,fp8,fp8,0,1.6946934064229329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,fp8,0,1.8641759554545085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,float16,0,1.8777546882629395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,float16,0,1.0980853239695232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,fp8,0,1.8749866485595703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,fp8,0,1.0898559888203938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,128,0,1,fp8,fp8,0,1.7165333429972331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,128,0,1,fp8,fp8,0,1.0392639636993408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,float16,0,0.933189312616984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,fp8,0,0.9440320332845052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,128,0,1,fp8,fp8,0,0.8515199820200602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,float16,0,0.9412960211435953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,fp8,0,0.938917318979899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,128,0,1,fp8,fp8,0,0.8572853406270345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,float16,0,0.9493172963460287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,fp8,0,0.9476906458536783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,float16,0,0.5620693365732828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,128,0,1,fp8,fp8,0,0.8692320187886556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,fp8,0,0.5502453247706095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,128,0,1,fp8,fp8,0,0.5328799883524576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,float16,0,0.47997331619262695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,fp8,0,0.48310399055480957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,128,0,1,fp8,fp8,0,0.4288160006205241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,float16,0,0.48364798227945965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,fp8,0,0.48679999510447186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,128,0,1,fp8,fp8,0,0.43457067012786865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,float16,0,0.4879680077234904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,fp8,0,0.4889920155207316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,128,0,1,fp8,fp8,0,0.43929600715637207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,float16,0,0.29738134145736694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,fp8,0,0.28972800572713214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,128,0,1,fp8,fp8,0,0.2796586751937866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,float16,0,0.2548159956932068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,fp8,0,0.2549920082092285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,128,0,1,fp8,fp8,0,0.22637865940729776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,float16,0,0.2576213280359904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,fp8,0,0.25677333275477093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,128,0,1,fp8,fp8,0,0.23118933041890463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,float16,0,0.2574826677640279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,128,0,1,fp8,fp8,0,0.2342133323351542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,float16,0,0.16530666748682657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,fp8,0,0.2585279941558838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,fp8,0,0.16056533654530844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,128,0,1,fp8,fp8,0,0.15613866845766702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,float16,0,0.13806933164596558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,fp8,0,0.13879467050234476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,128,0,1,fp8,fp8,0,0.1240053375562032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,float16,0,0.1397173305352529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,fp8,0,0.13967999815940857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,128,0,1,fp8,fp8,0,0.1258133351802826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,float16,0,0.14109866817792258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,fp8,0,0.1418186624844869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,128,0,1,fp8,fp8,0,0.12782933314641318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,float16,0,0.09423999985059102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,128,0,1,fp8,fp8,0,0.09091732899347942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,fp8,0,0.09244799613952637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,float16,0,0.08037333190441132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,fp8,0,0.08135466774304707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,128,0,1,fp8,fp8,0,0.07215466598669688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,float16,0,0.08005866905053456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,128,0,1,fp8,fp8,0,0.0721919983625412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,fp8,0,0.08100800216197968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,float16,0,0.08212266862392426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,fp8,0,0.08091733356316884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,128,0,1,fp8,fp8,0,0.07231999933719635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,128,0,1,fp8,fp8,0,5.660549163818359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,128,0,1,float16,float16,0,6.316352208455403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,1,128,0,1,float16,fp8,0,6.289557139078776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,128,0,1,float16,float16,0,6.681088129679362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,128,0,1,fp8,fp8,0,6.27835210164388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,4,128,0,1,float16,fp8,0,6.689861297607422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,128,0,1,float16,float16,0,6.7667891184488935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,128,0,1,float16,fp8,0,6.772986729939778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,float16,0,3.954901377360026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,fp8,0,3.8529494603474936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,96,8,128,0,1,fp8,fp8,0,6.365903854370117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,float16,0,3.1327733993530273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,fp8,0,3.1378132502237954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,1,128,0,1,fp8,fp8,0,2.8474559783935547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,float16,0,3.211658795674642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,fp8,0,3.3332961400349936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,4,128,0,1,fp8,fp8,0,3.1303574244181314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,float16,0,3.2073227564493814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,fp8,0,3.2775678634643555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,96,8,128,0,1,fp8,fp8,0,3.1662718454996743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,float16,0,1.9183893203735352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,fp8,0,1.8944320678710938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,96,128,0,1,fp8,fp8,0,1.8208373387654622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,float16,0,1.5817227363586426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,fp8,0,1.5848479270935059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,1,128,0,1,fp8,fp8,0,1.4355573654174805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,float16,0,1.5935840606689453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,fp8,0,1.5964372952779133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,4,128,0,1,fp8,fp8,0,1.4850239753723145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,float16,0,1.6055787404378254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,fp8,0,1.6061546007792156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,96,8,128,0,1,fp8,fp8,0,1.471786657969157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,float16,0,0.950154701868693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,fp8,0,0.9335040251413981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,96,128,0,1,fp8,fp8,0,0.9102773666381836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,float16,0,0.7978879610697428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,128,0,1,fp8,fp8,0,0.7239519755045573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,fp8,0,0.8000746568044027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,float16,0,0.8049279848734537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,fp8,0,0.8078347047170004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,4,128,0,1,fp8,fp8,0,0.7310612996419271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,float16,0,0.8104906876881918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,fp8,0,0.8118133544921875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,96,8,128,0,1,fp8,fp8,0,0.7442826430002848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,float16,0,0.49053335189819336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,fp8,0,0.4767306645711263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,96,128,0,1,fp8,fp8,0,0.46634666124979657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,float16,0,0.413152019182841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,fp8,0,0.41307199001312256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,1,128,0,1,fp8,fp8,0,0.3652373154958089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,float16,0,0.41573866208394367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,fp8,0,0.4143893321355184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,4,128,0,1,fp8,fp8,0,0.36950401465098065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,float16,0,0.4180053472518921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,fp8,0,0.41697601477305096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,96,8,128,0,1,fp8,fp8,0,0.37724800904591876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,float16,0,0.25748799244562787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,fp8,0,0.2510346571604411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,96,128,0,1,fp8,fp8,0,0.24497600396474203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,float16,0,0.21789334217707315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,fp8,0,0.21862399578094482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,1,128,0,1,fp8,fp8,0,0.19264533122380575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,float16,0,0.21925866603851318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,fp8,0,0.21954133113225302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,4,128,0,1,fp8,fp8,0,0.19678932428359985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,float16,0,0.22061866521835327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,fp8,0,0.22174400091171265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,96,8,128,0,1,fp8,fp8,0,0.199455996354421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,float16,0,0.1418773333231608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,fp8,0,0.13944533467292786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,96,128,0,1,fp8,fp8,0,0.1379093329111735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,float16,0,0.11774399876594543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,fp8,0,0.11827199657758077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,1,128,0,1,fp8,fp8,0,0.10693333546320598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,float16,0,0.11988266309102376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,fp8,0,0.11960533261299133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,4,128,0,1,fp8,fp8,0,0.1086293359597524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,float16,0,0.12156800429026286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,fp8,0,0.12170132994651794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,96,8,128,0,1,fp8,fp8,0,0.10977600018183391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,float16,0,0.07859733204046886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,fp8,0,0.07830399771531422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,96,128,0,1,fp8,fp8,0,0.0794239987929662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,float16,0,0.0681279997030894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,fp8,0,0.06834133466084798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,1,128,0,1,fp8,fp8,0,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,float16,0,0.06810666620731354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,fp8,0,0.0683840016523997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,4,128,0,1,fp8,fp8,0,0.060506666700045265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,float16,0,0.06804800033569336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,fp8,0,0.06870399912198384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,96,8,128,0,1,fp8,fp8,0,0.062037333846092224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,float16,0,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,fp8,0,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,96,128,0,1,fp8,fp8,0,0.04275733232498169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,float16,0,0.0468746672074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,1,128,0,1,fp8,fp8,0,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,float16,0,0.04548799991607666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,4,128,0,1,fp8,fp8,0,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,float16,0,0.04587733248869578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,fp8,0,0.04576533536116282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,96,8,128,0,1,fp8,fp8,0,0.040778666734695435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,float16,0,2.844304084777832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,128,0,1,fp8,fp8,0,2.5696694056193032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,fp8,0,2.8471946716308594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,float16,0,2.936181386311849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,128,0,1,fp8,fp8,0,2.852208137512207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,fp8,0,2.992277463277181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,float16,0,2.9692586263020835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,fp8,0,2.959365208943685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,96,8,128,0,1,fp8,fp8,0,2.8794453938802085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,float16,0,1.750127951304118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,fp8,0,1.7071146965026855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,96,128,0,1,fp8,fp8,0,1.7062719662984211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,float16,0,1.4299200375874836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,fp8,0,1.4328853289286296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,1,128,0,1,fp8,fp8,0,1.296511967976888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,float16,0,1.4420960744222004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,fp8,0,1.4459840456644695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,4,128,0,1,fp8,fp8,0,1.3438612620035808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,float16,0,1.45250670115153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,fp8,0,1.453877290089925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,96,8,128,0,1,fp8,fp8,0,1.330672025680542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,float16,0,0.8705546855926514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,fp8,0,0.8502186934153239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,96,128,0,1,fp8,fp8,0,0.8430079619089762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,float16,0,0.7237866719563802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,fp8,0,0.7251573403676351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,1,128,0,1,fp8,fp8,0,0.6530186732610067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,float16,0,0.7294987042744955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,fp8,0,0.7302560011545817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,4,128,0,1,fp8,fp8,0,0.6593120098114014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,float16,0,0.7322986920674642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,fp8,0,0.7327199776967367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,96,8,128,0,1,fp8,fp8,0,0.6728746891021729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,float16,0,0.4448000192642212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,fp8,0,0.43483734130859375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,96,128,0,1,fp8,fp8,0,0.4310346841812134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,float16,0,0.3717386722564697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,fp8,0,0.372165322303772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,1,128,0,1,fp8,fp8,0,0.3285013238588969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,float16,0,0.37426666418711346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,fp8,0,0.37512532869974774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,4,128,0,1,fp8,fp8,0,0.33373332023620605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,float16,0,0.3758560021718343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,fp8,0,0.37724268436431885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,96,8,128,0,1,fp8,fp8,0,0.33814934889475506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,float16,0,0.23627734184265137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,fp8,0,0.22952532768249512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,96,128,0,1,fp8,fp8,0,0.22606933116912842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,float16,0,0.19690134127934775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,fp8,0,0.19647467136383057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,1,128,0,1,fp8,fp8,0,0.1744640072186788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,float16,0,0.19842666387557983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,fp8,0,0.19810134172439575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,4,128,0,1,fp8,fp8,0,0.1770133376121521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,float16,0,0.2004586656888326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,fp8,0,0.20018666982650757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,96,8,128,0,1,fp8,fp8,0,0.181002676486969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,float16,0,0.13195199767748514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,fp8,0,0.1292800009250641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,96,128,0,1,fp8,fp8,0,0.12729600071907043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,float16,0,0.10868799686431885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,fp8,0,0.10784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,1,128,0,1,fp8,fp8,0,0.09670933087666829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,float16,0,0.10967999696731567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,fp8,0,0.10905599594116211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,4,128,0,1,fp8,fp8,0,0.09930133819580078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,float16,0,0.11140799522399902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,fp8,0,0.1111199955145518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,96,8,128,0,1,fp8,fp8,0,0.10115733742713928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,float16,0,0.07112533350785573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,fp8,0,0.07025066514809926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,96,128,0,1,fp8,fp8,0,0.07218666871388753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,float16,0,0.06060799956321716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,fp8,0,0.06043200194835663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,1,128,0,1,fp8,fp8,0,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,float16,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,fp8,0,0.06061333417892456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,4,128,0,1,fp8,fp8,0,0.05385066568851471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,float16,0,0.062080000837643944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,fp8,0,0.060549333691596985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,96,8,128,0,1,fp8,fp8,0,0.05472533404827118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,float16,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,fp8,0,0.044682666659355164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,96,128,0,1,fp8,fp8,0,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,float16,0,0.04172799984614054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,1,128,0,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,float16,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,4,128,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,float16,0,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,fp8,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,96,8,128,0,1,fp8,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,96,128,0,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,1,128,0,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,4,128,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,float16,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,96,8,128,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,float16,0,1.5737813313802083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,fp8,0,1.5720106760660808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,1,128,0,1,fp8,fp8,0,1.4386879603068035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,float16,0,1.5854506492614746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,128,0,1,fp8,fp8,0,1.4469280242919922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,fp8,0,1.5870614051818848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,float16,0,1.5998026529947917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,fp8,0,1.5980587005615234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,96,8,128,0,1,fp8,fp8,0,1.4775627454121907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,float16,0,0.939903974533081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,fp8,0,0.918176015218099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,96,128,0,1,fp8,fp8,0,0.9098026752471924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,float16,0,0.7927947044372559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,fp8,0,0.793071985244751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,1,128,0,1,fp8,fp8,0,0.7211519877115885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,float16,0,0.7975413004557291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,fp8,0,0.7976586818695068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,4,128,0,1,fp8,fp8,0,0.7269066969553629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,float16,0,0.8030347029368082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,fp8,0,0.8043946425120035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,96,8,128,0,1,fp8,fp8,0,0.7393120129903158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,float16,0,0.47860264778137207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,fp8,0,0.46798932552337646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,96,128,0,1,fp8,fp8,0,0.4649493296941121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,float16,0,0.40598400433858234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,fp8,0,0.4044479926427205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,1,128,0,1,fp8,fp8,0,0.3581226666768392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,float16,0,0.4082506497701009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,fp8,0,0.4079893430074056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,4,128,0,1,fp8,fp8,0,0.363813320795695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,float16,0,0.41069865226745605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,fp8,0,0.4105759859085083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,96,8,128,0,1,fp8,fp8,0,0.3686773379643758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,float16,0,0.24928534030914307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,fp8,0,0.2435413400332133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,96,128,0,1,fp8,fp8,0,0.24038400252660116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,float16,0,0.21076265970865884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,fp8,0,0.21171200275421143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,1,128,0,1,fp8,fp8,0,0.18800532817840576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,float16,0,0.2118613322575887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,fp8,0,0.21285333236058554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,4,128,0,1,fp8,fp8,0,0.19092265764872232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,float16,0,0.21358400583267212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,fp8,0,0.2123946746190389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,96,8,128,0,1,fp8,fp8,0,0.1933493415514628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,float16,0,0.13397333025932312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,fp8,0,0.13224533200263977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,96,128,0,1,fp8,fp8,0,0.13365333278973898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,float16,0,0.11423466602961223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,fp8,0,0.11389333009719849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,1,128,0,1,fp8,fp8,0,0.10314666231473286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,float16,0,0.11552533507347107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,fp8,0,0.11493333180745442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,4,128,0,1,fp8,fp8,0,0.10494400064150493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,float16,0,0.11525332927703857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,fp8,0,0.11516799529393514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,96,8,128,0,1,fp8,fp8,0,0.10588266452153523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,float16,0,0.07434666653474171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,fp8,0,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,96,128,0,1,fp8,fp8,0,0.07620266576608022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,float16,0,0.06574399769306183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,fp8,0,0.06610666712125142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,1,128,0,1,fp8,fp8,0,0.057999998331069946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,float16,0,0.06637333333492279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,fp8,0,0.06603200236956279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,4,128,0,1,fp8,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,float16,0,0.06491733094056447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,fp8,0,0.0662773350874583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,96,8,128,0,1,fp8,fp8,0,0.05781333148479462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,float16,0,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,96,128,0,1,fp8,fp8,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,float16,0,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,1,128,0,1,fp8,fp8,0,0.036576000352700554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,float16,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,4,128,0,1,fp8,fp8,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,float16,0,0.04011733333269755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,96,8,128,0,1,fp8,fp8,0,0.03728533287843069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,float16,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,fp8,0,0.02903999884923299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,96,128,0,1,fp8,fp8,0,0.028181334336598713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,float16,0,0.027669332921504974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,1,128,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,float16,0,0.027749332288901012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,4,128,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,96,8,128,0,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,96,128,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,fp8,0,0.020549333343903225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,1,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,4,128,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,96,8,128,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,float16,0,1.0219093163808186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,fp8,0,1.0249439875284831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,1,128,0,1,fp8,fp8,0,0.9365493456522623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,float16,0,1.026245355606079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,fp8,0,1.0264159838358562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,4,128,0,1,fp8,fp8,0,0.9432693322499593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,float16,0,1.0306453704833984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,fp8,0,1.0303359826405842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,96,8,128,0,1,fp8,fp8,0,0.9561653137207031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,float16,0,0.5938773155212402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,fp8,0,0.5823359886805216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,96,128,0,1,fp8,fp8,0,0.5702613194783529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,float16,0,0.5192266702651978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,fp8,0,0.5192746718724569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,1,128,0,1,fp8,fp8,0,0.4654239813486735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,float16,0,0.5218613147735596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,fp8,0,0.5233013232549032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,4,128,0,1,fp8,fp8,0,0.4703253507614136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,float16,0,0.5240053335825602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,fp8,0,0.5243786573410034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,96,8,128,0,1,fp8,fp8,0,0.4758400122324626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,float16,0,0.30564266443252563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,fp8,0,0.30084266265233356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,96,128,0,1,fp8,fp8,0,0.293285330136617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,float16,0,0.2669706741968791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,fp8,0,0.2687679926554362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,1,128,0,1,fp8,fp8,0,0.24077866474787393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,float16,0,0.2690719962120056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,fp8,0,0.2688480019569397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,4,128,0,1,fp8,fp8,0,0.2446026603380839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,float16,0,0.271071990331014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,fp8,0,0.2698400020599365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,96,8,128,0,1,fp8,fp8,0,0.24684266249338785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,float16,0,0.16081066926320395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,fp8,0,0.15844266613324484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,96,128,0,1,fp8,fp8,0,0.15820266803105673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,float16,0,0.14035200079282126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,fp8,0,0.141184002161026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,1,128,0,1,fp8,fp8,0,0.12643733620643616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,float16,0,0.14180266857147217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,fp8,0,0.141375998655955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,4,128,0,1,fp8,fp8,0,0.1301866670449575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,float16,0,0.14324266711870828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,fp8,0,0.14338666200637817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,96,8,128,0,1,fp8,fp8,0,0.13191466530164084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,float16,0,0.08665600419044495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,fp8,0,0.08691733082135518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,96,128,0,1,fp8,fp8,0,0.08841600020726521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,float16,0,0.07834133505821228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,fp8,0,0.07834666470686595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,1,128,0,1,fp8,fp8,0,0.06906133393446605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,float16,0,0.07825066645940144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,fp8,0,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,4,128,0,1,fp8,fp8,0,0.06986133257548015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,float16,0,0.07829333345095317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,fp8,0,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,96,8,128,0,1,fp8,fp8,0,0.07081066568692525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,float16,0,0.0498986691236496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,fp8,0,0.04930133124192556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,96,128,0,1,fp8,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,float16,0,0.04831466575463613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,fp8,0,0.04782933493455251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,1,128,0,1,fp8,fp8,0,0.04286933441956838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,float16,0,0.048672000567118325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,fp8,0,0.04826133449872335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,4,128,0,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,float16,0,0.04851733148097992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,fp8,0,0.04790933430194855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,96,8,128,0,1,fp8,fp8,0,0.044138665000597634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,float16,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,96,128,0,1,fp8,fp8,0,0.029887999097506206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,float16,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,1,128,0,1,fp8,fp8,0,0.028832000990708668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,4,128,0,1,fp8,fp8,0,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,fp8,0,0.031082667410373688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,96,8,128,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,float16,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,96,128,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,1,128,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,float16,0,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,4,128,0,1,fp8,fp8,0,0.022895999252796173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,96,8,128,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,96,128,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,1,128,0,1,fp8,fp8,0,0.01647466669480006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,4,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,fp8,0,0.017893332988023758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,96,8,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,float16,0,0.7522026697794596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,fp8,0,0.7525013287862142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,1,128,0,1,fp8,fp8,0,0.6933386325836182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,float16,0,0.7545706431070963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,fp8,0,0.7531680266062418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,4,128,0,1,fp8,fp8,0,0.697930653889974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,float16,0,0.7562133471171061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,fp8,0,0.7546026706695557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,96,8,128,0,1,fp8,fp8,0,0.7030293146769205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,float16,0,0.420144001642863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,fp8,0,0.4147626558939616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,96,128,0,1,fp8,fp8,0,0.40821866194407147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,float16,0,0.3833226760228475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,fp8,0,0.38336535294850665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,1,128,0,1,fp8,fp8,0,0.35497601826985675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,float16,0,0.385696013768514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,fp8,0,0.3843626578648885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,4,128,0,1,fp8,fp8,0,0.35806934038798016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,float16,0,0.38537601629892987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,fp8,0,0.38570133845011395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,96,8,128,0,1,fp8,fp8,0,0.36057066917419434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,float16,0,0.2182933290799459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,fp8,0,0.2152000069618225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,96,128,0,1,fp8,fp8,0,0.21451199054718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,float16,0,0.2001919945081075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,fp8,0,0.19938133160273233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,1,128,0,1,fp8,fp8,0,0.18414932489395142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,float16,0,0.2007946570714315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,fp8,0,0.2004586656888326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,4,128,0,1,fp8,fp8,0,0.1849600076675415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,float16,0,0.20121600230534872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,fp8,0,0.20086399714152017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,96,8,128,0,1,fp8,fp8,0,0.18905067443847656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,float16,0,0.11552000045776367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,fp8,0,0.11377066373825073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,96,128,0,1,fp8,fp8,0,0.11646399895350139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,float16,0,0.1050879955291748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,fp8,0,0.10558933019638062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,1,128,0,1,fp8,fp8,0,0.09682133793830872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,float16,0,0.10585066676139832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,fp8,0,0.10728533069292705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,4,128,0,1,fp8,fp8,0,0.0967733363310496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,float16,0,0.10598400235176086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,fp8,0,0.10599467158317566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,96,8,128,0,1,fp8,fp8,0,0.09668800234794617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,float16,0,0.0639573335647583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,fp8,0,0.06333333253860474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,96,128,0,1,fp8,fp8,0,0.05986666679382324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,float16,0,0.062208001812299095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,fp8,0,0.06140799820423126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,1,128,0,1,fp8,fp8,0,0.05651199817657471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,float16,0,0.06150933106740316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,4,128,0,1,fp8,fp8,0,0.058133333921432495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,float16,0,0.061994666854540505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,fp8,0,0.061887999375661217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,96,8,128,0,1,fp8,fp8,0,0.058058664202690125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,float16,0,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,fp8,0,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,96,128,0,1,fp8,fp8,0,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,float16,0,0.03932266682386398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,fp8,0,0.03867733230193456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,1,128,0,1,fp8,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,float16,0,0.03930133332808813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,4,128,0,1,fp8,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,float16,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,96,8,128,0,1,fp8,fp8,0,0.03708266715208689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,96,128,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,1,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,float16,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,4,128,0,1,fp8,fp8,0,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,96,8,128,0,1,fp8,fp8,0,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,96,128,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,1,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,4,128,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,96,8,128,0,1,fp8,fp8,0,0.020293333878119785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,96,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,float16,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,1,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,4,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,96,8,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,float16,0,0.6126559972763062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,fp8,0,0.6142986615498861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,1,128,0,1,fp8,fp8,0,0.5844159921010336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,float16,0,0.6137119928995768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,fp8,0,0.6158026854197184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,4,128,0,1,fp8,fp8,0,0.5887680053710938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,float16,0,0.6153759956359863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,fp8,0,0.616101344426473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,96,8,128,0,1,fp8,fp8,0,0.5908213456471761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,float16,0,0.33291200796763104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,fp8,0,0.33058132727940875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,96,128,0,1,fp8,fp8,0,0.3291093309720357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,float16,0,0.3145973285039266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,fp8,0,0.3144906759262085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,1,128,0,1,fp8,fp8,0,0.29818133513132733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,float16,0,0.3150879939397176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,fp8,0,0.3142399986584981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,4,128,0,1,fp8,fp8,0,0.301258663336436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,float16,0,0.316048006216685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,fp8,0,0.3158666690190633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,96,8,128,0,1,fp8,fp8,0,0.3031093279520671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,float16,0,0.17294400930404663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,fp8,0,0.17281067371368408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,96,128,0,1,fp8,fp8,0,0.17349332571029663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,float16,0,0.1629706621170044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,fp8,0,0.16427733500798544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,1,128,0,1,fp8,fp8,0,0.15414933363596597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,float16,0,0.16435733437538147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,fp8,0,0.16456000010172525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,4,128,0,1,fp8,fp8,0,0.1536799967288971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,float16,0,0.16457066933314005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,fp8,0,0.16395200292269388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,96,8,128,0,1,fp8,fp8,0,0.154448002576828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,float16,0,0.09081600109736125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,fp8,0,0.0913866659005483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,96,128,0,1,fp8,fp8,0,0.08854400118192036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,float16,0,0.09100266297658284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,fp8,0,0.0906933347384135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,1,128,0,1,fp8,fp8,0,0.08513066172599792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,float16,0,0.09098133444786072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,fp8,0,0.09073600172996521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,4,128,0,1,fp8,fp8,0,0.0862666666507721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,float16,0,0.09078933795293172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,fp8,0,0.08947733044624329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,96,8,128,0,1,fp8,fp8,0,0.08519466718037923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,float16,0,0.05372266471385956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,96,128,0,1,fp8,fp8,0,0.0517546683549881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,float16,0,0.053082664807637535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,fp8,0,0.05349866549173991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,1,128,0,1,fp8,fp8,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,float16,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,fp8,0,0.0537066658337911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,4,128,0,1,fp8,fp8,0,0.0496373325586319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,float16,0,0.051962668697039284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,fp8,0,0.052501335740089417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,96,8,128,0,1,fp8,fp8,0,0.04969066878159841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,float16,0,0.03389866650104523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,fp8,0,0.03491200009981791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,96,128,0,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,float16,0,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,1,128,0,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,float16,0,0.03465066601832708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,4,128,0,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,float16,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,96,8,128,0,1,fp8,fp8,0,0.0346666673819224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,float16,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,fp8,0,0.023957334458827972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,96,128,0,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,fp8,0,0.024031999210516613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,1,128,0,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,fp8,0,0.02407466620206833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,4,128,0,1,fp8,fp8,0,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,96,8,128,0,1,fp8,fp8,0,0.022442666192849476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,96,128,0,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,1,128,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,4,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,96,8,128,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,96,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,1,128,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,4,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,96,8,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,128,0,1,fp8,fp8,0,24.58770243326823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,float16,0,31.840293884277344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,fp8,0,31.851221720377605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,float16,0,30.962666829427082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,128,0,1,fp8,fp8,0,24.60956319173177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,fp8,0,31.899973551432293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,float16,0,31.60723114013672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,128,0,1,fp8,fp8,0,24.535433451334637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,fp8,0,31.91070302327474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,128,0,1,fp8,fp8,0,24.840576171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,float16,0,31.496729532877605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,fp8,0,31.91656494140625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,float16,0,16.59592056274414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,128,0,1,fp8,fp8,0,12.659039815266928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,fp8,0,16.486255645751953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,float16,0,15.676512400309244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,128,0,1,fp8,fp8,0,12.011744181315104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,fp8,0,15.833839416503906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,float16,0,16.070538838704426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,128,0,1,fp8,fp8,0,12.074928283691406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,fp8,0,15.878303527832031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,float16,0,15.682528177897135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,fp8,0,15.861471811930338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,128,0,1,fp8,fp8,0,11.993114471435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,128,0,1,fp8,fp8,0,12.334320068359375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,float16,0,16.086549123128254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,fp8,0,15.716261545817057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,float16,0,8.39517339070638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,fp8,0,8.292677561442057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,128,0,1,fp8,fp8,0,6.517381032307942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,float16,0,8.14346694946289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,128,0,1,fp8,fp8,0,6.1469065348307295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,fp8,0,8.210528055826822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,128,0,1,fp8,fp8,0,6.243930816650391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,float16,0,8.096848169962565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,fp8,0,8.044944127400717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,128,0,1,fp8,fp8,0,6.128101348876953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,float16,0,8.044794718424479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,fp8,0,7.980149586995442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,128,0,1,fp8,fp8,0,6.208005269368489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,float16,0,7.9133758544921875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,fp8,0,8.28053347269694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,float16,0,4.3564958572387695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,fp8,0,4.459946632385254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,128,0,1,fp8,fp8,0,3.39900811513265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,128,0,1,fp8,fp8,0,3.280986785888672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,float16,0,4.097290674845378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,fp8,0,4.255381266276042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,128,0,1,fp8,fp8,0,3.3124799728393555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,float16,0,4.158986727396647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,fp8,0,4.120501200358073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,128,0,1,fp8,fp8,0,3.3114452362060547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,float16,0,4.161898612976074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,fp8,0,4.030277252197266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,float16,0,4.1085920333862305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,128,0,1,fp8,fp8,0,3.3423945109049478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,fp8,0,3.9805386861165366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,128,0,1,fp8,fp8,0,14.324698130289713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,fp8,0,18.248240152994793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,float16,0,19.015552520751953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,float16,0,18.33025614420573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,128,0,1,fp8,fp8,0,14.24127451578776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,fp8,0,18.470272064208984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,float16,0,18.678517659505207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,fp8,0,18.526507059733074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,128,0,1,fp8,fp8,0,14.4510129292806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,128,0,1,fp8,fp8,0,14.416645050048828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,float16,0,18.34997812906901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,fp8,0,18.55146662394206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,float16,0,9.719653447469076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,128,0,1,fp8,fp8,0,7.572405497233073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,fp8,0,9.901423772176107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,float16,0,9.238117218017578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,128,0,1,fp8,fp8,0,7.21783447265625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,fp8,0,9.379119873046875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,float16,0,9.304666519165039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,128,0,1,fp8,fp8,0,7.146826426188151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,fp8,0,9.096885045369467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,float16,0,9.444426854451498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,128,0,1,fp8,fp8,0,7.23695437113444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,fp8,0,9.231301625569662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,128,0,1,fp8,fp8,0,7.292890548706055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,fp8,0,9.295477549235025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,float16,0,9.313098907470703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,float16,0,5.053584098815918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,128,0,1,fp8,fp8,0,3.948330561319987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,fp8,0,5.162298520406087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,float16,0,4.520288149515788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,128,0,1,fp8,fp8,0,3.7127841313680015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,fp8,0,4.809407869974772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,float16,0,4.816922823588054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,128,0,1,fp8,fp8,0,3.7065226236979165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,fp8,0,4.757541338602702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,128,0,1,fp8,fp8,0,3.7225119272867837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,fp8,0,4.75278917948405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,float16,0,4.896416028340657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,float16,0,4.6048587163289385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,128,0,1,fp8,fp8,0,3.718661308288574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,fp8,0,4.7699839274088545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,float16,0,2.454634666442871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,fp8,0,2.4446400006612143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,128,0,1,fp8,fp8,0,2.1820267041524253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,128,0,1,fp8,fp8,0,2.024223963419596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,float16,0,2.3764799435933432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,fp8,0,2.3490079243977866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,float16,0,2.4087146123250327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,fp8,0,2.3547520637512207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,128,0,1,fp8,fp8,0,2.1326187451680503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,float16,0,2.419439951578776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,128,0,1,fp8,fp8,0,2.0254720052083335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,fp8,0,2.3629013697306314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,float16,0,2.4831093152364097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,128,0,1,fp8,fp8,0,2.07152525583903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,fp8,0,2.3494879404703775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,128,0,1,fp8,fp8,0,10.126256306966146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,fp8,0,13.18572743733724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,float16,0,13.610613505045572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,float16,0,13.171114603678385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,128,0,1,fp8,fp8,0,10.322773615519205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,fp8,0,13.066336313883463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,float16,0,13.209733327229818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,fp8,0,13.486160278320312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,128,0,1,fp8,fp8,0,10.114805221557617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,float16,0,13.153792063395182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,fp8,0,13.336549123128256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,128,0,1,fp8,fp8,0,10.393301645914713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,float16,0,7.171424229939778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,fp8,0,7.4400583902994795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,128,0,1,fp8,fp8,0,5.582581202189128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,128,0,1,fp8,fp8,0,5.167296091715495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,float16,0,6.682725270589192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,fp8,0,6.809098561604817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,float16,0,6.715061187744141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,128,0,1,fp8,fp8,0,5.121024131774902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,fp8,0,6.689264297485352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,128,0,1,fp8,fp8,0,5.2397918701171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,float16,0,6.740133285522461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,fp8,0,6.702431996663411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,128,0,1,fp8,fp8,0,5.198767979939778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,float16,0,6.598096211751302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,fp8,0,6.700645446777344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,float16,0,3.5825812021891275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,fp8,0,3.5946826934814453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,128,0,1,fp8,fp8,0,2.866522789001465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,float16,0,3.2323519388834634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,128,0,1,fp8,fp8,0,2.6996161142985025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,fp8,0,3.3784958521525064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,float16,0,3.2653067906697593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,fp8,0,3.2392638524373374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,128,0,1,fp8,fp8,0,2.8897012074788413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,float16,0,3.235194524129232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,128,0,1,fp8,fp8,0,2.7041600545247397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,fp8,0,3.228053410847982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,128,0,1,fp8,fp8,0,2.7706454594930015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,float16,0,3.2550878524780273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,fp8,0,3.3629814783732095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,float16,0,1.8115839958190918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,fp8,0,1.9418506622314453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,128,0,1,fp8,fp8,0,1.6853920618693035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,float16,0,1.8162933985392253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,128,0,1,fp8,fp8,0,1.50217072168986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,fp8,0,1.7297120094299316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,float16,0,1.7383626302083333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,fp8,0,1.710576057434082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,128,0,1,fp8,fp8,0,1.5813439687093098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,float16,0,1.730511983235677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,fp8,0,1.7417279879252117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,128,0,1,fp8,fp8,0,1.5064959526062012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,float16,0,1.7360533078511555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,128,0,1,fp8,fp8,0,1.510335922241211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,fp8,0,1.7250399589538574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,128,0,1,fp8,fp8,0,13.626475016276041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,float16,0,17.27245839436849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,fp8,0,17.531930287679035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,128,0,1,fp8,fp8,0,13.73467763264974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,float16,0,17.73124821980794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,fp8,0,17.865882873535156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,128,0,1,fp8,fp8,0,13.811850229899088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,float16,0,17.906330108642578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,fp8,0,17.95482126871745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,128,0,1,fp8,fp8,0,14.0065549214681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,float16,0,17.802837371826172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,fp8,0,18.122564951578777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,float16,0,9.438245137532553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,128,0,1,fp8,fp8,0,7.340970357259114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,fp8,0,9.635279973347982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,fp8,0,8.827445348103842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,128,0,1,fp8,fp8,0,6.853658676147461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,float16,0,8.821407953898111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,128,0,1,fp8,fp8,0,6.909413019816081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,fp8,0,8.776506423950195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,float16,0,8.919471740722656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,128,0,1,fp8,fp8,0,6.8755308787028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,float16,0,8.900400161743164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,fp8,0,8.936010360717773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,128,0,1,fp8,fp8,0,6.860160191853841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,float16,0,8.828954696655273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,fp8,0,9.07327969868978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,float16,0,4.82588259379069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,128,0,1,fp8,fp8,0,3.8265279134114585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,fp8,0,4.927450815836589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,float16,0,4.280298550923665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,128,0,1,fp8,fp8,0,3.5696853001912436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,fp8,0,4.524709383646647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,128,0,1,fp8,fp8,0,3.63044802347819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,float16,0,4.505306561787923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,fp8,0,4.481722513834636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,128,0,1,fp8,fp8,0,3.546976089477539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,float16,0,4.151098569234212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,fp8,0,4.420346577962239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,128,0,1,fp8,fp8,0,3.5087254842122397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,fp8,0,4.22437858581543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,float16,0,4.265695889790853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,float16,0,2.3247520128885903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,128,0,1,fp8,fp8,0,2.0047094027201333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,fp8,0,2.344384034474691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,float16,0,2.273098627726237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,fp8,0,2.1947360038757324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,128,0,1,fp8,fp8,0,1.9660479227701824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,float16,0,2.1707305908203125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,128,0,1,fp8,fp8,0,1.865781307220459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,fp8,0,2.1980907122294107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,float16,0,2.2031520207722983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,fp8,0,2.1940107345581055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,128,0,1,fp8,fp8,0,1.910597324371338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,float16,0,2.207599957784017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,fp8,0,2.1907359759012857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,128,0,1,fp8,fp8,0,1.8906879425048828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,float16,0,1.2350506782531738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,fp8,0,1.2746293544769287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,128,0,1,fp8,fp8,0,1.1082773208618164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,float16,0,1.1877386569976807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,128,0,1,fp8,fp8,0,1.0565439860026042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,fp8,0,1.189728021621704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,float16,0,1.1856586933135986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,fp8,0,1.1899680296579997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,128,0,1,fp8,fp8,0,1.06549866994222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,float16,0,1.194655974706014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,fp8,0,1.1929279963175456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,128,0,1,fp8,fp8,0,1.0586986541748047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,float16,0,1.195306698481242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,fp8,0,1.1971680323282878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,128,0,1,fp8,fp8,0,1.0623413721720378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,128,0,1,fp8,fp8,0,8.262608210245768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,float16,0,10.50590960184733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,fp8,0,10.45845858256022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,float16,0,10.482400258382162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,128,0,1,fp8,fp8,0,8.34554672241211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,fp8,0,10.5131467183431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,128,0,1,fp8,fp8,0,8.278191884358725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,float16,0,11.098597208658854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,fp8,0,10.843109130859375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,128,0,1,fp8,fp8,0,8.46722157796224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,float16,0,10.747477213541666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,fp8,0,10.681920369466146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,float16,0,5.481248219807942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,fp8,0,5.707162857055664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,128,0,1,fp8,fp8,0,4.526858647664388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,128,0,1,fp8,fp8,0,4.148048082987468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,float16,0,5.382656097412109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,fp8,0,5.223919868469238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,float16,0,5.2268320719401045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,128,0,1,fp8,fp8,0,4.212629318237305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,fp8,0,5.565216064453125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,128,0,1,fp8,fp8,0,4.232261339823405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,fp8,0,5.3334401448567705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,float16,0,5.383888244628906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,128,0,1,fp8,fp8,0,4.24725882212321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,float16,0,5.204965273539226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,fp8,0,5.402624130249023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,float16,0,2.8609441121419272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,fp8,0,2.87934939066569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,128,0,1,fp8,fp8,0,2.533888022104899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,float16,0,2.573359966278076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,128,0,1,fp8,fp8,0,2.1684533754984536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,fp8,0,2.593536059061686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,float16,0,2.5493812561035156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,fp8,0,2.620800018310547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,128,0,1,fp8,fp8,0,2.2906079292297363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,128,0,1,fp8,fp8,0,2.1833386421203613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,float16,0,2.600927988688151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,fp8,0,2.5569334030151367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,float16,0,2.5314666430155435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,fp8,0,2.5601654052734375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,128,0,1,fp8,fp8,0,2.2312374114990234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,float16,0,1.5143465995788574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,fp8,0,1.5931413968404133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,128,0,1,fp8,fp8,0,1.2726293404897053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,float16,0,1.3599093755086262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,128,0,1,fp8,fp8,0,1.1803840001424153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,fp8,0,1.3827733993530273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,float16,0,1.365023930867513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,fp8,0,1.3377493222554524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,128,0,1,fp8,fp8,0,1.2929813067118328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,float16,0,1.3570559819539387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,fp8,0,1.3540533383687336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,128,0,1,fp8,fp8,0,1.1900479793548584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,float16,0,1.3640106519063313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,fp8,0,1.3500320116678874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,128,0,1,fp8,fp8,0,1.1847360134124756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,float16,0,0.8025279839833578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,fp8,0,0.8297706445058187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,128,0,1,fp8,fp8,0,0.7222026983896891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,float16,0,0.7825547059377035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,fp8,0,0.7681972980499268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,128,0,1,fp8,fp8,0,0.6826559702555338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,float16,0,0.7676373322804769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,fp8,0,0.7600213686625162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,128,0,1,fp8,fp8,0,0.6838293075561523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,float16,0,0.7583839893341064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,fp8,0,0.7589600086212158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,128,0,1,fp8,fp8,0,0.6853866577148438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,float16,0,0.7602933247884115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,fp8,0,0.7603147029876709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,128,0,1,fp8,fp8,0,0.6875413258870443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,128,0,1,fp8,fp8,0,8.48953628540039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,float16,0,10.661872227986654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,fp8,0,10.795759836832682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,float16,0,11.107317606608072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,128,0,1,fp8,fp8,0,8.518437067667643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,fp8,0,10.977344512939453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,float16,0,10.985652923583984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,fp8,0,10.766309102376303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,128,0,1,fp8,fp8,0,8.468165079752604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,float16,0,11.107711791992188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,128,0,1,fp8,fp8,0,8.717274983723959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,fp8,0,11.177711486816406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,float16,0,5.963797251383464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,128,0,1,fp8,fp8,0,4.701637268066406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,fp8,0,5.9176375071207685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,float16,0,5.048080126444499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,128,0,1,fp8,fp8,0,4.214101473490397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,fp8,0,5.055727958679199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,float16,0,5.1449174880981445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,128,0,1,fp8,fp8,0,4.220479965209961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,fp8,0,5.3814131418863935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,float16,0,5.211813290913899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,128,0,1,fp8,fp8,0,4.245546658833821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,fp8,0,5.002799987792969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,float16,0,5.210847854614258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,fp8,0,5.30785592397054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,fp8,0,2.8484373092651367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,128,0,1,fp8,fp8,0,4.264970779418945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,float16,0,2.752533276875814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,float16,0,2.565114657084147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,128,0,1,fp8,fp8,0,2.4708693822224936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,fp8,0,2.514362653096517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,128,0,1,fp8,fp8,0,2.194437344868978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,float16,0,2.501322587331136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,fp8,0,2.4941333134969077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,128,0,1,fp8,fp8,0,2.1729599634806314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,float16,0,2.5165173212687173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,fp8,0,2.5164853731791177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,float16,0,2.6045920054117837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,128,0,1,fp8,fp8,0,2.206922690073649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,fp8,0,2.598730723063151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,float16,0,1.4427733421325684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,fp8,0,1.4320693016052246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,128,0,1,fp8,fp8,0,1.4010613759358723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,float16,0,1.315013329188029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,fp8,0,1.3975626627604167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,128,0,1,fp8,fp8,0,1.1726400057474773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,128,0,1,fp8,fp8,0,2.190255959828695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,float16,0,1.3122399648030598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,fp8,0,1.3130613168080647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,128,0,1,fp8,fp8,0,1.1507199605305989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,fp8,0,1.3137493133544922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,128,0,1,fp8,fp8,0,1.189130703608195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,float16,0,1.3090240160624187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,fp8,0,1.3305813471476238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,float16,0,0.7711466948191324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,128,0,1,fp8,fp8,0,1.168602705001831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,fp8,0,0.7855467001597086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,128,0,1,fp8,fp8,0,0.6988960107167562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,float16,0,0.7101439634958903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,fp8,0,0.7162400086720785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,128,0,1,fp8,fp8,0,0.644048015276591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,float16,0,0.7128000259399414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,float16,0,1.3073919614156086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,fp8,0,0.716106653213501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,128,0,1,fp8,fp8,0,0.6409386793772379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,float16,0,0.7123893102010092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,128,0,1,fp8,fp8,0,0.644816001256307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,float16,0,0.7131893634796143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,fp8,0,0.7189280192057291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,128,0,1,fp8,fp8,0,0.6461706558863322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,float16,0,0.43926934401194256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,fp8,0,0.4456640084584554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,128,0,1,fp8,fp8,0,0.41183467706044513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,float16,0,0.4129653374354045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,fp8,0,0.7142186959584554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,fp8,0,0.41340800126393634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,128,0,1,fp8,fp8,0,0.3842720190684001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,float16,0,0.41232534249623615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,fp8,0,0.41646401087443036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,128,0,1,fp8,fp8,0,0.38496001561482746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,float16,0,0.41449066003163654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,fp8,0,0.4163680076599121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,128,0,1,fp8,fp8,0,0.38818665345509845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,float16,0,0.42041067282358807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,fp8,0,0.4207679828008016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,128,0,1,fp8,fp8,0,0.38761067390441895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,128,0,1,fp8,fp8,0,5.357744216918945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,float16,0,6.486517588297526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,fp8,0,6.431333541870117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,float16,0,6.566815694173177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,128,0,1,fp8,fp8,0,5.38809076944987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,fp8,0,6.527952194213867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,float16,0,6.538842519124349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,fp8,0,6.492277145385742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,128,0,1,fp8,fp8,0,5.394869486490886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,float16,0,6.551071802775065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,fp8,0,6.643007914225261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,128,0,1,fp8,fp8,0,5.430432001749675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,float16,0,3.647141456604004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,128,0,1,fp8,fp8,0,3.129253387451172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,fp8,0,3.574021339416504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,float16,0,3.1074241002400718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,fp8,0,3.2031733194986978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,128,0,1,fp8,fp8,0,2.7010507583618164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,float16,0,3.1570507685343423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,128,0,1,fp8,fp8,0,2.729029337565104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,fp8,0,3.1704158782958984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,float16,0,3.193552017211914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,128,0,1,fp8,fp8,0,2.740048090616862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,fp8,0,3.309690793355306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,float16,0,3.181386629740397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,float16,0,1.8589919408162434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,128,0,1,fp8,fp8,0,2.738650639851888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,fp8,0,3.2843360900878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,fp8,0,1.9351946512858074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,128,0,1,fp8,fp8,0,1.7319572766621907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,float16,0,1.5889387130737305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,fp8,0,1.6439679463704426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,128,0,1,fp8,fp8,0,1.4328746795654297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,float16,0,1.598629315694173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,fp8,0,1.600122610727946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,128,0,1,fp8,fp8,0,1.5581493377685547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,float16,0,1.6007466316223145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,fp8,0,1.6073546409606934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,128,0,1,fp8,fp8,0,1.4434506098429363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,float16,0,1.696015993754069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,fp8,0,1.6540427207946777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,float16,0,0.9194773038228353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,128,0,1,fp8,fp8,0,1.4215466181437175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,fp8,0,0.9773279825846354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,128,0,1,fp8,fp8,0,0.91976531346639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,float16,0,0.839237372080485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,fp8,0,0.8425599733988444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,128,0,1,fp8,fp8,0,0.773962656656901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,float16,0,0.8511573473612467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,fp8,0,0.8426506519317627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,128,0,1,fp8,fp8,0,0.7531946500142416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,float16,0,0.8463946978251139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,fp8,0,0.8520906766255697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,128,0,1,fp8,fp8,0,0.7582133611043295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,float16,0,0.850383996963501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,fp8,0,0.8510879675547282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,float16,0,0.5047626495361328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,128,0,1,fp8,fp8,0,0.7621386845906576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,fp8,0,0.5125493208567301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,128,0,1,fp8,fp8,0,0.46726401646931964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,float16,0,0.46670401096343994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,fp8,0,0.4673279921213786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,128,0,1,fp8,fp8,0,0.42524266242980957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,float16,0,0.46750934918721515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,fp8,0,0.46938665707906085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,128,0,1,fp8,fp8,0,0.4276426633199056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,float16,0,0.46983468532562256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,fp8,0,0.4708266655604045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,128,0,1,fp8,fp8,0,0.4294666846593221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,float16,0,0.47257065773010254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,fp8,0,0.4740533431371053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,128,0,1,fp8,fp8,0,0.4308319886525472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,float16,0,0.30061866839726764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,128,0,1,fp8,fp8,0,0.28387733300526935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,fp8,0,0.3062719901402791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,float16,0,0.2765706578890483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,fp8,0,0.27880533536275226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,128,0,1,fp8,fp8,0,0.2607626716295878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,float16,0,0.2794666687647502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,fp8,0,0.2771679957707723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,128,0,1,fp8,fp8,0,0.2617759903271993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,float16,0,0.27854933341344196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,128,0,1,fp8,fp8,0,0.2616426746050517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,fp8,0,0.2820693254470825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,float16,0,0.28385066986083984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,fp8,0,0.28299200534820557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,128,0,1,fp8,fp8,0,0.2640746633211772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,128,0,1,fp8,fp8,0,5.8787415822347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,float16,0,7.0073496500651045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,fp8,0,7.067871729532878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,float16,0,7.179248174031575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,128,0,1,fp8,fp8,0,5.921770731608073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,fp8,0,7.055365244547526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,float16,0,7.024341583251953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,fp8,0,6.982554753621419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,128,0,1,fp8,fp8,0,5.941141128540039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,float16,0,7.17086919148763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,128,0,1,fp8,fp8,0,5.984575907389323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,fp8,0,7.1630401611328125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,float16,0,3.9204800923665366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,128,0,1,fp8,fp8,0,3.3721386591593423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,fp8,0,3.847050666809082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,float16,0,3.397653261820475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,fp8,0,3.361424128214518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,float16,0,3.3408374786376953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,fp8,0,3.343045234680176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,128,0,1,fp8,fp8,0,2.9324639638264975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,128,0,1,fp8,fp8,0,2.948645273844401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,fp8,0,3.3866666158040366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,float16,0,3.360981305440267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,128,0,1,fp8,fp8,0,2.9544267654418945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,float16,0,3.462458610534668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,float16,0,1.9049545923868816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,128,0,1,fp8,fp8,0,2.9898398717244468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,fp8,0,3.5287787119547525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,fp8,0,1.9510560035705566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,128,0,1,fp8,fp8,0,1.789237340291341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,float16,0,1.6877226829528809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,128,0,1,fp8,fp8,0,1.5632905960083008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,fp8,0,1.7115519841512044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,float16,0,1.6891466776529949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,128,0,1,fp8,fp8,0,1.5094772974650066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,fp8,0,1.7031572659810383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,float16,0,1.6994773546854656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,fp8,0,1.7147679328918457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,128,0,1,fp8,fp8,0,1.5217119852701824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,float16,0,1.7102665901184082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,128,0,1,fp8,fp8,0,1.5262293815612793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,fp8,0,1.7263733545939128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,float16,0,0.9853013356526693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,fp8,0,1.0044906934102376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,128,0,1,fp8,fp8,0,0.9026186466217041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,float16,0,0.8791146278381348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,fp8,0,0.877946694691976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,fp8,0,0.8841173648834229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,128,0,1,fp8,fp8,0,0.7880799770355225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,float16,0,0.8794399897257487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,128,0,1,fp8,fp8,0,0.7890933354695638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,float16,0,0.8867306709289551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,128,0,1,fp8,fp8,0,0.7960960070292155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,fp8,0,0.8924319744110107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,float16,0,0.891808032989502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,float16,0,0.5249386628468832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,fp8,0,0.8975093364715576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,128,0,1,fp8,fp8,0,0.8013439973195394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,fp8,0,0.5349706808725992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,128,0,1,fp8,fp8,0,0.48603200912475586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,float16,0,0.47227732340494794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,fp8,0,0.4735200007756551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,128,0,1,fp8,fp8,0,0.4293760061264038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,float16,0,0.4719626506169637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,fp8,0,0.47519465287526447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,128,0,1,fp8,fp8,0,0.43080000082651776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,fp8,0,0.475930651028951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,float16,0,0.47337599595387775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,128,0,1,fp8,fp8,0,0.43351999918619794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,float16,0,0.4782826503117879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,fp8,0,0.30316799879074097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,128,0,1,fp8,fp8,0,0.4345333178838094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,float16,0,0.2967360019683838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,fp8,0,0.47953065236409503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,128,0,1,fp8,fp8,0,0.279039998849233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,float16,0,0.2657066583633423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,fp8,0,0.2707359989484151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,128,0,1,fp8,fp8,0,0.2507893244425456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,128,0,1,fp8,fp8,0,0.249616007010142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,float16,0,0.26970134178797406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,float16,0,0.269269327322642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,fp8,0,0.27058132489522296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,fp8,0,0.270197331905365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,128,0,1,fp8,fp8,0,0.25308799743652344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,float16,0,0.2731519937515259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,128,0,1,fp8,fp8,0,0.25483200947443646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,float16,0,0.1853760083516439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,fp8,0,0.1881813406944275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,128,0,1,fp8,fp8,0,0.17479999860127768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,float16,0,0.16797866423924765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,fp8,0,0.16666133205095926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,128,0,1,fp8,fp8,0,0.15640532970428467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,float16,0,0.16766933600107828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,fp8,0,0.167738676071167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,128,0,1,fp8,fp8,0,0.15615999698638916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,float16,0,0.1660480002562205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,fp8,0,0.16773333152135214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,128,0,1,fp8,fp8,0,0.15637333194414774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,float16,0,0.1686240037282308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,fp8,0,0.16843199729919434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,128,0,1,fp8,fp8,0,0.15849066774050394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,128,0,1,fp8,fp8,0,3.932933489481608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,float16,0,4.552986780802409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,fp8,0,4.532261212666829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,float16,0,4.546821276346843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,fp8,0,0.27324267228444415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,128,0,1,fp8,fp8,0,3.9564746220906577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,fp8,0,4.577381451924642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,float16,0,4.574069341023763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,fp8,0,4.582352002461751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,128,0,1,fp8,fp8,0,3.9605652491251626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,float16,0,2.5710934003194175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,float16,0,4.655189196268718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,128,0,1,fp8,fp8,0,4.001599947611491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,fp8,0,4.6903839111328125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,fp8,0,2.6320106188456216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,128,0,1,fp8,fp8,0,2.3228000005086265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,float16,0,2.2225866317749023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,fp8,0,2.2170186042785645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,128,0,1,fp8,fp8,0,2.0023999214172363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,fp8,0,2.2275412877400718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,128,0,1,fp8,fp8,0,1.9828054110209148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,float16,0,2.217210610707601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,float16,0,2.2261759440104165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,fp8,0,2.2433493932088218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,128,0,1,fp8,fp8,0,2.0017333030700684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,float16,0,2.249674638112386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,128,0,1,fp8,fp8,0,2.0051466623942056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,float16,0,1.2941653728485107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,fp8,0,2.255514621734619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,128,0,1,fp8,fp8,0,1.1831413110097249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,fp8,0,1.3155253728230794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,float16,0,1.1273653507232666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,fp8,0,1.1364266872406006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,128,0,1,fp8,fp8,0,1.0174026489257812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,float16,0,1.131994644800822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,fp8,0,1.1419359842936199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,128,0,1,fp8,fp8,0,1.0199573040008545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,float16,0,1.1370986302693684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,fp8,0,1.1464640299479167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,128,0,1,fp8,fp8,0,1.0257866382598877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,float16,0,1.146938641866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,fp8,0,1.1576426823933919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,128,0,1,fp8,fp8,0,1.0354613463083904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,float16,0,0.6762932936350504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,fp8,0,0.6865599950154623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,128,0,1,fp8,fp8,0,0.6246453523635864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,float16,0,0.59279465675354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,fp8,0,0.5984586477279663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,128,0,1,fp8,fp8,0,0.5378880103429159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,float16,0,0.5985920031865438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,fp8,0,0.5955466826756796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,128,0,1,fp8,fp8,0,0.5393813451131185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,float16,0,0.5987093448638916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,fp8,0,0.6021920045216879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,128,0,1,fp8,fp8,0,0.5429493188858032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,float16,0,0.6011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,float16,0,0.361135999361674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,128,0,1,fp8,fp8,0,0.5454453229904175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,fp8,0,0.6050186554590861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,fp8,0,0.37196266651153564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,128,0,1,fp8,fp8,0,0.34062933921813965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,float16,0,0.3237333297729492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,fp8,0,0.3235413432121277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,128,0,1,fp8,fp8,0,0.2967733343442281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,float16,0,0.32473599910736084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,fp8,0,0.3242826660474141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,128,0,1,fp8,fp8,0,0.2976800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,float16,0,0.3253920078277588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,fp8,0,0.32787734270095825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,128,0,1,fp8,fp8,0,0.30011733373006183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,float16,0,0.3272213339805603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,fp8,0,0.3311733404795329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,128,0,1,fp8,fp8,0,0.3018133242925008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,float16,0,0.21176000436147055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,fp8,0,0.215503990650177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,128,0,1,fp8,fp8,0,0.19834667444229126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,float16,0,0.1862773299217224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,fp8,0,0.18581332763036093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,128,0,1,fp8,fp8,0,0.17458132902781168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,float16,0,0.18648000558217367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,fp8,0,0.1864853302637736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,128,0,1,fp8,fp8,0,0.1753013332684835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,float16,0,0.18709866205851236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,fp8,0,0.1874986688296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,128,0,1,fp8,fp8,0,0.17767467101415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,float16,0,0.19026132424672446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,fp8,0,0.19050133228302002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,128,0,1,fp8,fp8,0,0.17877866824467978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,float16,0,0.1318986713886261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,fp8,0,0.13502933581670126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,128,0,1,fp8,fp8,0,0.12770133217175803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,float16,0,0.12149332960446675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,fp8,0,0.12090133627255757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,128,0,1,fp8,fp8,0,0.1148426632086436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,float16,0,0.1206719974676768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,fp8,0,0.12136000394821167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,128,0,1,fp8,fp8,0,0.11584533254305522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,float16,0,0.12083199620246887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,fp8,0,0.12127466996510823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,128,0,1,fp8,fp8,0,0.11580800016721089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,float16,0,0.1211946705977122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,fp8,0,0.1234933336575826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,128,0,1,fp8,fp8,0,0.11551466584205627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,float16,0,4.755530675252278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,128,0,1,fp8,fp8,0,4.377936045328776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,fp8,0,4.744298617045085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,float16,0,4.818383852640788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,fp8,0,4.7445065180460615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,128,0,1,fp8,fp8,0,4.471285184224446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,float16,0,4.987498601277669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,fp8,0,4.9445546468098955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,128,0,1,fp8,fp8,0,4.830095926920573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,float16,0,5.119455973307292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,fp8,0,2.8630666732788086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,float16,0,2.897146542867025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,128,0,1,fp8,fp8,0,2.722906748453776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,fp8,0,5.051936149597168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,128,0,1,fp8,fp8,0,4.861978530883789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,float16,0,2.3740107218424478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,fp8,0,2.3489386240641275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,128,0,1,fp8,fp8,0,2.2066613833109536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,float16,0,2.3506773312886557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,fp8,0,2.333061377207438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,128,0,1,fp8,fp8,0,2.2101333936055503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,float16,0,2.3680426279703775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,fp8,0,2.358463923136393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,128,0,1,fp8,fp8,0,2.39520533879598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,float16,0,2.3784213066101074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,float16,0,1.4111839930216472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,fp8,0,2.395498593648275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,fp8,0,1.3880319595336914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,128,0,1,fp8,fp8,0,1.342192014058431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,128,0,1,fp8,fp8,0,2.408736069997152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,float16,0,1.1770826975504558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,fp8,0,1.1809279918670654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,128,0,1,fp8,fp8,0,1.1155893007914226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,float16,0,1.1831146876017253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,fp8,0,1.181658665339152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,128,0,1,fp8,fp8,0,1.121989329655965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,float16,0,1.1866933504740398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,fp8,0,1.1927733421325684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,128,0,1,fp8,fp8,0,1.1322027047475178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,float16,0,1.2022613684336345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,fp8,0,1.1992159684499104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,128,0,1,fp8,fp8,0,1.1427946885426838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,float16,0,0.7126719951629639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,fp8,0,0.6946506500244141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,float16,0,0.6007680098215739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,128,0,1,fp8,fp8,0,0.6809866428375244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,fp8,0,0.5981599887212118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,128,0,1,fp8,fp8,0,0.5687466859817505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,float16,0,0.6014293432235718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,fp8,0,0.6021226644515991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,128,0,1,fp8,fp8,0,0.5709653298060099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,float16,0,0.6033813158671061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,fp8,0,0.6057173411051432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,128,0,1,fp8,fp8,0,0.5747626622517904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,float16,0,0.6140586535135905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,fp8,0,0.6095413366953532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,float16,0,0.36901334921518963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,fp8,0,0.35990933577219647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,128,0,1,fp8,fp8,0,0.5801493326822916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,128,0,1,fp8,fp8,0,0.3530133167902629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,float16,0,0.31178667147954303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,fp8,0,0.31483733654022217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,128,0,1,fp8,fp8,0,0.290282666683197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,float16,0,0.31378666559855145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,128,0,1,fp8,fp8,0,0.29049599170684814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,fp8,0,0.31470932563145954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,float16,0,0.31679999828338623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,fp8,0,0.31512532631556195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,128,0,1,fp8,fp8,0,0.2943893273671468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,float16,0,0.31702399253845215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,fp8,0,0.31888000170389813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,128,0,1,fp8,fp8,0,0.3001493414243062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,float16,0,0.1978666583697001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,float16,0,0.16736000776290894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,fp8,0,0.1926986575126648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,128,0,1,fp8,fp8,0,0.1882773240407308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,fp8,0,0.16842132806777954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,128,0,1,fp8,fp8,0,0.1544426679611206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,float16,0,0.16945600509643555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,fp8,0,0.16771199305852255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,128,0,1,fp8,fp8,0,0.156549334526062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,float16,0,0.16916799545288086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,fp8,0,0.16986666123072305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,128,0,1,fp8,fp8,0,0.15838399529457092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,float16,0,0.17086400588353476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,fp8,0,0.1720693310101827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,128,0,1,fp8,fp8,0,0.16250666975975037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,fp8,0,0.11132799585660298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,float16,0,0.11373866597811381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,float16,0,0.09468799829483032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,128,0,1,fp8,fp8,0,0.11002666751543681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,fp8,0,0.09494933485984802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,128,0,1,fp8,fp8,0,0.08556266625722249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,float16,0,0.09537600477536519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,fp8,0,0.09539733330408733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,128,0,1,fp8,fp8,0,0.0864533285299937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,float16,0,0.09469866752624512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,fp8,0,0.0950933297475179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,128,0,1,fp8,fp8,0,0.08682666222254436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,float16,0,0.09500799576441447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,fp8,0,0.09539199868837993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,128,0,1,fp8,fp8,0,0.08885332942008972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,float16,0,0.06496533254782359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,128,0,1,fp8,fp8,0,0.06363200147946675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,float16,0,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,fp8,0,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,128,0,1,fp8,fp8,0,0.05506666501363119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,128,0,1,fp8,fp8,0,0.05417599777380625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,float16,0,0.05881066620349884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,float16,0,0.05850133299827576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,fp8,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,fp8,0,0.06011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,128,0,1,fp8,fp8,0,0.05444799860318502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,float16,0,0.059402664502461754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,128,0,1,fp8,fp8,0,0.0539680023988088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,float16,0,3.909104029337565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,128,0,1,fp8,fp8,0,3.7394612630208335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,fp8,0,3.932880083719889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,float16,0,3.9388532638549805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,fp8,0,3.953690528869629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,2,128,0,1,fp8,fp8,0,3.9011360804239907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,float16,0,4.164410591125488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,fp8,0,4.216053326924642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,4,128,0,1,fp8,fp8,0,4.206645329793294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,float16,0,4.254768053690593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,float16,0,2.4903146425882974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,fp8,0,2.4440746307373047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,fp8,0,4.254037221272786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,64,8,128,0,1,fp8,fp8,0,4.217167854309082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,float16,0,2.043488025665283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,64,128,0,1,fp8,fp8,0,2.3995787302652993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,fp8,0,1.9709332784016926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,1,128,0,1,fp8,fp8,0,1.8801706631978352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,float16,0,1.9825493494669597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,fp8,0,1.9809279441833496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,2,128,0,1,fp8,fp8,0,1.8859626452128093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,float16,0,1.9903359413146973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,fp8,0,1.9972160657246907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,4,128,0,1,fp8,fp8,0,2.076639970143636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,float16,0,2.0142614046732583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,fp8,0,2.036458651224772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,64,8,128,0,1,fp8,fp8,0,2.0944053332010903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,float16,0,1.2063466707865398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,fp8,0,1.1967413425445557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,float16,0,0.9989493687947592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,64,128,0,1,fp8,fp8,0,1.18340269724528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,fp8,0,0.9966773192087809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,1,128,0,1,fp8,fp8,0,0.9536320368448893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,float16,0,1.004688024520874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,fp8,0,1.000981330871582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,2,128,0,1,fp8,fp8,0,0.9586559931437174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,float16,0,1.007317304611206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,fp8,0,1.0100373427073162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,4,128,0,1,fp8,fp8,0,0.964576005935669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,float16,0,1.0157866477966309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,fp8,0,1.0193119843800862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,64,8,128,0,1,fp8,fp8,0,0.9846826394399008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,float16,0,0.6108373403549194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,fp8,0,0.5961066484451294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,64,128,0,1,fp8,fp8,0,0.5963786840438843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,float16,0,0.5109493335088094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,fp8,0,0.5075626770655314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,1,128,0,1,fp8,fp8,0,0.48261864980061847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,float16,0,0.510037342707316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,fp8,0,0.5093546708424886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,2,128,0,1,fp8,fp8,0,0.48739198843638104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,float16,0,0.5110453367233276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,fp8,0,0.512234648068746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,4,128,0,1,fp8,fp8,0,0.49127999941507977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,float16,0,0.5171146790186564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,fp8,0,0.5196906725565592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,float16,0,0.31547733147939044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,64,8,128,0,1,fp8,fp8,0,0.4952746629714966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,fp8,0,0.31067200501759845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,64,128,0,1,fp8,fp8,0,0.31007466713587445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,float16,0,0.2649066646893819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,fp8,0,0.264682670434316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,1,128,0,1,fp8,fp8,0,0.24765866994857788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,float16,0,0.26545600096384686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,fp8,0,0.26506133874257404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,2,128,0,1,fp8,fp8,0,0.24759999910990396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,float16,0,0.26706133286158246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,fp8,0,0.26714666684468585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,4,128,0,1,fp8,fp8,0,0.2510026693344116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,float16,0,0.2690773407618205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,fp8,0,0.2707200050354004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,64,8,128,0,1,fp8,fp8,0,0.25785066684087116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,float16,0,0.1704053282737732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,fp8,0,0.16526933511098227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,64,128,0,1,fp8,fp8,0,0.16430399815241495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,float16,0,0.1421066621939341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,fp8,0,0.14334932963053384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,1,128,0,1,fp8,fp8,0,0.1318719983100891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,float16,0,0.1422719955444336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,fp8,0,0.14173866311709085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,2,128,0,1,fp8,fp8,0,0.13453333576520285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,float16,0,0.14415466785430908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,fp8,0,0.14341333508491516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,4,128,0,1,fp8,fp8,0,0.1348960002263387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,float16,0,0.14549332857131958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,fp8,0,0.14616533120473227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,64,8,128,0,1,fp8,fp8,0,0.13848533233006796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,128,0,1,fp8,fp8,0,0.09527466694513957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,float16,0,0.096778670946757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,fp8,0,0.09530666470527649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,float16,0,0.0803306649128596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,fp8,0,0.07859733204046886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,1,128,0,1,fp8,fp8,0,0.07251200079917908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,float16,0,0.07845333218574524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,fp8,0,0.07821333408355713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,2,128,0,1,fp8,fp8,0,0.07186666627724965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,float16,0,0.0793333351612091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,fp8,0,0.08005866905053456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,4,128,0,1,fp8,fp8,0,0.07268266876538594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,float16,0,0.07859733204046886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,fp8,0,0.08003733555475871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,64,8,128,0,1,fp8,fp8,0,0.07525866727034251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,float16,0,0.053082664807637535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,fp8,0,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,64,128,0,1,fp8,fp8,0,0.055717334151268005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,float16,0,0.04996266464392344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,float16,0,0.04960533479849497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,1,128,0,1,fp8,fp8,0,0.045514668027559914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,fp8,0,0.049914668003718056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,2,128,0,1,fp8,fp8,0,0.04713066418965658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,float16,0,0.04920533299446106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,4,128,0,1,fp8,fp8,0,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,float16,0,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,64,8,128,0,1,fp8,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,float16,0,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,fp8,0,0.03264000018437704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,64,128,0,1,fp8,fp8,0,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,float16,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,float16,0,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,1,128,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,128,0,1,fp8,fp8,0,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,float16,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,4,128,0,1,fp8,fp8,0,0.03081600119670232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,float16,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,64,8,128,0,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,float16,0,1.7761386235555012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,fp8,0,1.7713707288106282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,float16,0,1.779904047648112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,1,128,0,1,fp8,fp8,0,1.7052747408548992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,128,0,1,fp8,fp8,0,1.7079359690348308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,fp8,0,1.788335959116618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,float16,0,1.7890879313151042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,fp8,0,1.7863680521647136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,4,128,0,1,fp8,fp8,0,1.909056027730306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,float16,0,1.8109173774719238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,fp8,0,1.8158613840738933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,float16,0,1.1036852995554607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,64,8,128,0,1,fp8,fp8,0,1.9135732650756836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,fp8,0,1.0769013563791912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,64,128,0,1,fp8,fp8,0,1.0976266860961914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,float16,0,0.893887996673584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,128,0,1,fp8,fp8,0,0.8618346850077311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,float16,0,0.8995733261108398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,fp8,0,0.8959039847056071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,fp8,0,0.899738629659017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,2,128,0,1,fp8,fp8,0,0.8641813596089681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,float16,0,0.903434673945109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,fp8,0,0.9040266672770182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,4,128,0,1,fp8,fp8,0,0.8714773654937744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,float16,0,0.911952018737793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,fp8,0,0.9110399881998698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,64,8,128,0,1,fp8,fp8,0,0.9094399611155192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,float16,0,0.45708799362182617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,fp8,0,0.5425706704457601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,float16,0,0.5573226610819498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,64,128,0,1,fp8,fp8,0,0.5539253155390421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,fp8,0,0.4608000119527181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,1,128,0,1,fp8,fp8,0,0.43695998191833496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,float16,0,0.45797332127888996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,128,0,1,fp8,fp8,0,0.43891199429829914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,float16,0,0.4599573214848836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,fp8,0,0.4617973168690999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,4,128,0,1,fp8,fp8,0,0.4437119960784912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,float16,0,0.46725332736968994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,fp8,0,0.46860265731811523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,float16,0,0.2905600070953369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,fp8,0,0.28300267457962036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,8,128,0,1,fp8,fp8,0,0.4481866757074992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,64,128,0,1,fp8,fp8,0,0.2874506711959839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,float16,0,0.2379146615664164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,128,0,1,fp8,fp8,0,0.2232159972190857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,fp8,0,0.2380160093307495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,float16,0,0.23703465859095255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,fp8,0,0.23815999428431192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,2,128,0,1,fp8,fp8,0,0.22263999780019125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,fp8,0,0.23898667097091675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,float16,0,0.23835732539494833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,float16,0,0.2408213416735331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,4,128,0,1,fp8,fp8,0,0.2267893354098002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,fp8,0,0.24148799975713095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,64,8,128,0,1,fp8,fp8,0,0.231930673122406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,float16,0,0.1532586713631948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,128,0,1,fp8,fp8,0,0.15339199701944986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,fp8,0,0.15064533551534018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,float16,0,0.12569066882133484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,float16,0,0.12569066882133484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,fp8,0,0.12532266974449158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,128,0,1,fp8,fp8,0,0.11910399794578552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,1,128,0,1,fp8,fp8,0,0.1183786690235138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,fp8,0,0.1256586710611979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,float16,0,0.1267466644446055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,fp8,0,0.12595733006795248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,4,128,0,1,fp8,fp8,0,0.12020267049471538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,float16,0,0.1295199990272522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,fp8,0,0.1285706659158071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,64,8,128,0,1,fp8,fp8,0,0.1241333285967509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,float16,0,0.0886240005493164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,fp8,0,0.0867039958635966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,64,128,0,1,fp8,fp8,0,0.08910933136940002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,float16,0,0.07246399919191997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,fp8,0,0.07051733136177063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,1,128,0,1,fp8,fp8,0,0.0660159985224406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,float16,0,0.07186133166154225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,fp8,0,0.07313600182533264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,2,128,0,1,fp8,fp8,0,0.0664106657107671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,float16,0,0.07111999889214833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,fp8,0,0.07241599758466084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,fp8,0,0.45817601680755615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,4,128,0,1,fp8,fp8,0,0.06656000018119812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,float16,0,0.07220800220966339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,fp8,0,0.07205333312352498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,64,8,128,0,1,fp8,fp8,0,0.06832533578077953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,fp8,0,0.04619733492533366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,128,0,1,fp8,fp8,0,0.04957866668701172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,float16,0,0.04353600243727366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,1,128,0,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,float16,0,0.04339733223120371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,fp8,0,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,2,128,0,1,fp8,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,float16,0,0.04310933252175649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,fp8,0,0.04372799893220266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,float16,0,0.0466186652580897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,4,128,0,1,fp8,fp8,0,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,float16,0,0.04316799839337667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,64,8,128,0,1,fp8,fp8,0,0.04065600037574768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,128,0,1,fp8,fp8,0,0.029077333708604176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,float16,0,0.029098667204380035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,fp8,0,0.029930666089057922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,float16,0,0.028773332635561626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,1,128,0,1,fp8,fp8,0,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,float16,0,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,2,128,0,1,fp8,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,float16,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,fp8,0,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,8,128,0,1,fp8,fp8,0,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,float16,0,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,64,4,128,0,1,fp8,fp8,0,0.028853334486484528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,64,128,0,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,1,128,0,1,fp8,fp8,0,0.022677332162857056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,2,128,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,float16,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,4,128,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,64,8,128,0,1,fp8,fp8,0,0.021989333132902782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,float16,0,0.9916533629099528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,fp8,0,0.9877119859059652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,1,128,0,1,fp8,fp8,0,0.9429492950439453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,float16,0,0.9929493268330892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,fp8,0,0.9959572950998942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,2,128,0,1,fp8,fp8,0,0.9469652970631918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,float16,0,1.0018133322397869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,fp8,0,0.9973013401031494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,4,128,0,1,fp8,fp8,0,0.9720053672790527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,float16,0,1.0134666760762532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,fp8,0,1.0116426944732666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,64,8,128,0,1,fp8,fp8,0,0.9977173010508219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,float16,0,0.6004319985707601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,fp8,0,0.5848160187403361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,64,128,0,1,fp8,fp8,0,0.5899519920349121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,float16,0,0.5032373269399008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,fp8,0,0.5037866830825806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,1,128,0,1,fp8,fp8,0,0.4757759968439738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,float16,0,0.5031840006510416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,fp8,0,0.5042399962743124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,2,128,0,1,fp8,fp8,0,0.4771519899368286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,float16,0,0.5058346589406332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,fp8,0,0.5075893402099609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,4,128,0,1,fp8,fp8,0,0.481386661529541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,float16,0,0.5128053426742554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,fp8,0,0.5117119948069254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,float16,0,0.3086293339729309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,64,8,128,0,1,fp8,fp8,0,0.48908265431722003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,fp8,0,0.30317866802215576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,64,128,0,1,fp8,fp8,0,0.30502400795618695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,float16,0,0.26022400458653766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,float16,0,0.2608960072199504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,fp8,0,0.25997332731882733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,1,128,0,1,fp8,fp8,0,0.2401813268661499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,fp8,0,0.26132800181706745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,float16,0,0.2625280022621155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,2,128,0,1,fp8,fp8,0,0.24200532833735147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,fp8,0,0.26239466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,4,128,0,1,fp8,fp8,0,0.24480533599853516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,float16,0,0.2648426691691081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,fp8,0,0.26501866181691486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,float16,0,0.16359466314315796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,64,8,128,0,1,fp8,fp8,0,0.2502506573994954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,fp8,0,0.15891200304031372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,128,0,1,fp8,fp8,0,0.1264906624952952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,64,128,0,1,fp8,fp8,0,0.16004266341527304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,float16,0,0.13715199629465738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,fp8,0,0.13794133067131042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,float16,0,0.13794133067131042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,fp8,0,0.1366986632347107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,2,128,0,1,fp8,fp8,0,0.1276800036430359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,float16,0,0.1381013294061025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,fp8,0,0.13828266660372415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,4,128,0,1,fp8,fp8,0,0.1297546625137329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,float16,0,0.1397599975268046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,fp8,0,0.1399679978688558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,64,8,128,0,1,fp8,fp8,0,0.13397866487503052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,float16,0,0.09016533692677815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,fp8,0,0.08846933643023173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,fp8,0,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,64,128,0,1,fp8,fp8,0,0.09160000085830688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,float16,0,0.07453333338101704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,float16,0,0.07388799885908763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,1,128,0,1,fp8,fp8,0,0.06717333197593689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,fp8,0,0.07427733143170674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,fp8,0,0.07595199843247731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,2,128,0,1,fp8,fp8,0,0.06836799780527751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,float16,0,0.07413866619269054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,4,128,0,1,fp8,fp8,0,0.06814933319886525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,float16,0,0.0747680018345515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,fp8,0,0.07632000247637431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,64,8,128,0,1,fp8,fp8,0,0.07180800040562947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,float16,0,0.04962133367856344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,fp8,0,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,64,128,0,1,fp8,fp8,0,0.05159999926884969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,float16,0,0.04645333190759023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,float16,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,1,128,0,1,fp8,fp8,0,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,fp8,0,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,2,128,0,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,float16,0,0.046767999728520714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,float16,0,0.04580266773700714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,fp8,0,0.04586666822433472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,4,128,0,1,fp8,fp8,0,0.04302933315436045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,fp8,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,64,8,128,0,1,fp8,fp8,0,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,float16,0,0.02906133234500885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,fp8,0,0.031130666534105938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,64,128,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,1,128,0,1,fp8,fp8,0,0.029045333464940388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,float16,0,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,fp8,0,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,128,0,1,fp8,fp8,0,0.02855466554562251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,2,128,0,1,fp8,fp8,0,0.029120000700155895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,float16,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,fp8,0,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,128,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,64,8,128,0,1,fp8,fp8,0,0.028725333511829376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,1,128,0,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,fp8,0,0.020645332833131153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,2,128,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,4,128,0,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,64,8,128,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,64,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,1,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,4,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,64,8,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,float16,0,0.6616319815317789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,fp8,0,0.6615306536356608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,1,128,0,1,fp8,fp8,0,0.6262186765670776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,fp8,0,0.662282665570577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,float16,0,0.6639413436253866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,2,128,0,1,fp8,fp8,0,0.6289920012156168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,float16,0,0.6671626567840576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,fp8,0,0.6662773291269938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,4,128,0,1,fp8,fp8,0,0.6310666799545288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,float16,0,0.6722933451334635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,fp8,0,0.6714239915211996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,float16,0,0.38818665345509845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,64,8,128,0,1,fp8,fp8,0,0.6392800013224283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,fp8,0,0.380570650100708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,64,128,0,1,fp8,fp8,0,0.37776533762613934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,float16,0,0.3392213185628255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,fp8,0,0.3407626549402873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,1,128,0,1,fp8,fp8,0,0.313920001188914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,float16,0,0.3410293261210124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,fp8,0,0.34087467193603516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,2,128,0,1,fp8,fp8,0,0.3161440094312032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,float16,0,0.34140264987945557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,fp8,0,0.3408000071843465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,4,128,0,1,fp8,fp8,0,0.3205813368161519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,float16,0,0.3444426854451497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,fp8,0,0.344213326772054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,float16,0,0.20248534282048544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,64,8,128,0,1,fp8,fp8,0,0.3247999946276347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,fp8,0,0.19834667444229126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,64,128,0,1,fp8,fp8,0,0.19737066825230917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,float16,0,0.1777226726214091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,fp8,0,0.1775839924812317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,1,128,0,1,fp8,fp8,0,0.164383997519811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,float16,0,0.17690666516621908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,fp8,0,0.1776853402455648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,float16,0,0.1790026624997457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,fp8,0,0.17800533771514893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,2,128,0,1,fp8,fp8,0,0.16537066300710043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,4,128,0,1,fp8,fp8,0,0.1669493317604065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,float16,0,0.1792746583620707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,float16,0,0.10973866780598958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,fp8,0,0.18101332585016885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,64,8,128,0,1,fp8,fp8,0,0.17010132471720377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,fp8,0,0.10903466741243999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,64,128,0,1,fp8,fp8,0,0.10962667067845662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,float16,0,0.09514133135477702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,fp8,0,0.09709333380063374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,1,128,0,1,fp8,fp8,0,0.08574933807055156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,fp8,0,0.09526933232943217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,float16,0,0.0950933297475179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,fp8,0,0.09711999694506328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,float16,0,0.09570133686065674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,2,128,0,1,fp8,fp8,0,0.08685866991678874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,4,128,0,1,fp8,fp8,0,0.08715200424194336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,fp8,0,0.09642133116722107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,float16,0,0.09617066383361816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,64,8,128,0,1,fp8,fp8,0,0.08923733234405518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,float16,0,0.05981333553791046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,fp8,0,0.053786665201187134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,fp8,0,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,64,128,0,1,fp8,fp8,0,0.05950933198134104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,float16,0,0.05528533458709717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,float16,0,0.054341331124305725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,fp8,0,0.054383998115857445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,fp8,0,0.05593599875768026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,2,128,0,1,fp8,fp8,0,0.05073600014050802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,float16,0,0.05455466608206431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,4,128,0,1,fp8,fp8,0,0.050848002235094704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,float16,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,float16,0,0.05434666574001312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,fp8,0,0.05569600065549215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,8,128,0,1,fp8,fp8,0,0.049866666396458946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,fp8,0,0.03716800113519033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,64,128,0,1,fp8,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,float16,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,fp8,0,0.03526933242877325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,float16,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,1,128,0,1,fp8,fp8,0,0.034389334420363106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,2,128,0,1,fp8,fp8,0,0.03316266586383184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,fp8,0,0.035818666219711304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,4,128,0,1,fp8,fp8,0,0.034341332813103996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,float16,0,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,fp8,0,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,64,8,128,0,1,fp8,fp8,0,0.03366400053103765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,float16,0,0.02388266722361247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,64,128,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,64,1,128,0,1,fp8,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,1,128,0,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,float16,0,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,2,128,0,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,128,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,8,128,0,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,64,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,float16,0,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,float16,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,4,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,float16,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,64,8,128,0,1,fp8,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,float16,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,64,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,1,128,0,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,2,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,4,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,float16,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,64,8,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,float16,0,0.5020586649576823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,fp8,0,0.5028053522109985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,1,128,0,1,fp8,fp8,0,0.4697920083999634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,float16,0,0.5022293329238892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,fp8,0,0.5033919811248779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,2,128,0,1,fp8,fp8,0,0.47149864832560223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,float16,0,0.5033546686172485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,fp8,0,0.5038133462270101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,4,128,0,1,fp8,fp8,0,0.4737653334935506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,float16,0,0.5057173172632853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,fp8,0,0.5056533416112264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,float16,0,0.2835413416226705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,64,8,128,0,1,fp8,fp8,0,0.47775999704996747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,float16,0,0.2587626576423645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,fp8,0,0.280074675877889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,64,128,0,1,fp8,fp8,0,0.27401600281397503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,fp8,0,0.25838400920232135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,1,128,0,1,fp8,fp8,0,0.24146666129430136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,float16,0,0.258735994497935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,128,0,1,fp8,fp8,0,0.24217599630355835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,128,0,1,fp8,fp8,0,0.2432159980138143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,fp8,0,0.25963733593622845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,float16,0,0.2590346733729045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,float16,0,0.26096532742182416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,fp8,0,0.261680006980896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,float16,0,0.14969066778818765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,8,128,0,1,fp8,fp8,0,0.24664000670115152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,fp8,0,0.14989333351453146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,64,128,0,1,fp8,fp8,0,0.14691733320554098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,float16,0,0.13666666547457376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,fp8,0,0.13620266318321228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,1,128,0,1,fp8,fp8,0,0.1252906620502472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,float16,0,0.13598400354385376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,fp8,0,0.2588319977124532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,fp8,0,0.1360319952170054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,2,128,0,1,fp8,fp8,0,0.12360533078511556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,float16,0,0.13705066839853922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,fp8,0,0.1377120018005371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,4,128,0,1,fp8,fp8,0,0.1260693371295929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,float16,0,0.13714133699735007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,fp8,0,0.13878933588663736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,64,8,128,0,1,fp8,fp8,0,0.12707199652989706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,float16,0,0.07858666777610779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,fp8,0,0.07946133116881053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,64,128,0,1,fp8,fp8,0,0.08041599889596303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,fp8,0,0.07459733386834462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,float16,0,0.07589333256085713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,fp8,0,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,1,128,0,1,fp8,fp8,0,0.07046933472156525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,float16,0,0.0765119989713033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,2,128,0,1,fp8,fp8,0,0.07016533116499583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,float16,0,0.07547733187675476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,fp8,0,0.07620800038178761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,4,128,0,1,fp8,fp8,0,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,float16,0,0.07658133407433827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,fp8,0,0.07521066566308339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,float16,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,64,8,128,0,1,fp8,fp8,0,0.07012266914049785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,float16,0,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,64,128,0,1,fp8,fp8,0,0.04520533482233683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,fp8,0,0.0454773356517156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,1,128,0,1,fp8,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,128,0,1,fp8,fp8,0,0.04253333310286204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,float16,0,0.04448533554871877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,fp8,0,0.04420266548792521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,fp8,0,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,2,128,0,1,fp8,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,float16,0,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,fp8,0,0.04427733520666758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,64,8,128,0,1,fp8,fp8,0,0.042170668641726174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,float16,0,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,fp8,0,0.030591999491055805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,64,128,0,1,fp8,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,1,128,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,float16,0,0.03035199890534083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,fp8,0,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,2,128,0,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,float16,0,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,4,128,0,1,fp8,fp8,0,0.027808000644048054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,float16,0,0.029743999242782593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,64,8,128,0,1,fp8,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,64,128,0,1,fp8,fp8,0,0.020799999435742695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,1,128,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,float16,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,128,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,2,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,64,8,128,0,1,fp8,fp8,0,0.019738666713237762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,128,0,1,fp8,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,64,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,2,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,4,128,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,64,8,128,0,1,fp8,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,64,128,0,1,fp8,fp8,0,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,1,128,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,float16,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,64,8,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,float16,0,0.414741317431132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,fp8,0,0.41517333189646405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,1,128,0,1,fp8,fp8,0,0.3946400086085002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,float16,0,0.41540801525115967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,fp8,0,0.41621867815653485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,2,128,0,1,fp8,fp8,0,0.39631466070810956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,float16,0,0.4145546754201253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,fp8,0,0.41572264830271405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,4,128,0,1,fp8,fp8,0,0.3980693419774373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,float16,0,0.41813333829243976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,fp8,0,0.22666666905085245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,fp8,0,0.4167519807815552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,float16,0,0.2275573412577311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,64,8,128,0,1,fp8,fp8,0,0.40010666847229004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,64,128,0,1,fp8,fp8,0,0.2258239984512329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,float16,0,0.21445866425832114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,fp8,0,0.2140373388926188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,1,128,0,1,fp8,fp8,0,0.20281066497166952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,float16,0,0.21508800983428955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,fp8,0,0.21432000398635864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,2,128,0,1,fp8,fp8,0,0.2025279998779297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,float16,0,0.21522667010625204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,fp8,0,0.21574934323628744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,4,128,0,1,fp8,fp8,0,0.2025279998779297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,float16,0,0.21619200706481934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,fp8,0,0.21580266952514648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,float16,0,0.11958932876586914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,64,8,128,0,1,fp8,fp8,0,0.20558400948842367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,fp8,0,0.11793067057927449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,64,128,0,1,fp8,fp8,0,0.11787199974060059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,fp8,0,0.1156213382879893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,float16,0,0.11542399724324544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,fp8,0,0.1156213382879893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,1,128,0,1,fp8,fp8,0,0.10776000221570332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,float16,0,0.11587199568748474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,2,128,0,1,fp8,fp8,0,0.10734400153160095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,float16,0,0.11422933141390483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,fp8,0,0.11589333415031433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,4,128,0,1,fp8,fp8,0,0.10889066259066264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,float16,0,0.1146506667137146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,fp8,0,0.11546666423479716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,64,8,128,0,1,fp8,fp8,0,0.1093280017375946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,float16,0,0.06612266600131989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,fp8,0,0.06673066814740498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,64,128,0,1,fp8,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,float16,0,0.06427733103434245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,128,0,1,fp8,fp8,0,0.06235733131567637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,fp8,0,0.06477866570154826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,1,128,0,1,fp8,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,float16,0,0.06611200173695882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,float16,0,0.06449600060780843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,fp8,0,0.06425599753856659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,float16,0,0.06534933547178905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,4,128,0,1,fp8,fp8,0,0.0625600020090739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,fp8,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,64,8,128,0,1,fp8,fp8,0,0.06227200229962667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,float16,0,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,float16,0,0.038693333665529885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,128,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,64,128,0,1,fp8,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,float16,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,float16,0,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,fp8,0,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,2,128,0,1,fp8,fp8,0,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,4,128,0,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,fp8,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,64,8,128,0,1,fp8,fp8,0,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,float16,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,64,128,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,float16,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,1,128,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,2,128,0,1,fp8,fp8,0,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,float16,0,0.026874666412671406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,fp8,0,0.026842666169007618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,4,128,0,1,fp8,fp8,0,0.025701334079106648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,float16,0,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,64,8,128,0,1,fp8,fp8,0,0.026165333886941273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,fp8,0,0.020831999679406483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,64,128,0,1,fp8,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,1,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,2,128,0,1,fp8,fp8,0,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,4,128,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,64,8,128,0,1,fp8,fp8,0,0.02032533288002014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,float16,0,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,2,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,4,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,64,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,64,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,1,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,2,128,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,4,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,64,8,128,0,1,fp8,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,128,0,1,fp8,fp8,0,18.16037368774414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,128,0,1,fp8,fp8,0,18.463130950927734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,float16,0,24.330174763997395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,fp8,0,24.432693481445312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,float16,0,24.269493103027344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,fp8,0,24.285077412923176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,float16,0,24.279525756835938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,128,0,1,fp8,fp8,0,9.51148796081543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,float16,0,12.437994639078775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,fp8,0,12.138922373453775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,float16,0,11.810943603515625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,fp8,0,11.706495920817057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,128,0,1,fp8,fp8,0,18.510501861572266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,fp8,0,23.764427185058594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,2,128,0,1,fp8,fp8,0,9.314202626546225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,128,0,1,fp8,fp8,0,9.344575881958008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,fp8,0,11.774394989013672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,float16,0,11.992554982503256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,float16,0,12.174084981282553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,fp8,0,12.119146982828775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,float16,0,6.315290451049805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,48,8,128,0,1,fp8,fp8,0,9.225162506103516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,fp8,0,6.301263809204102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,48,128,0,1,fp8,fp8,0,4.845370610555013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,float16,0,6.098794937133789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,128,0,1,fp8,fp8,0,4.635823885599772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,fp8,0,6.2609068552653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,float16,0,6.05126953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,128,0,1,fp8,fp8,0,4.637728055318196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,fp8,0,6.222527821858724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,float16,0,3.1067892710367837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,float16,0,5.993925094604492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,128,0,1,fp8,fp8,0,4.633434613545735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,fp8,0,6.098501205444336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,fp8,0,3.1962238947550454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,48,128,0,1,fp8,fp8,0,2.5889760653177896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,float16,0,3.040255864461263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,fp8,0,3.1102240880330405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,2,128,0,1,fp8,fp8,0,2.5410186449686685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,float16,0,3.010885238647461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,128,0,1,fp8,fp8,0,2.5391786893208823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,fp8,0,3.119669278462728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,float16,0,3.035445213317871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,fp8,0,3.0083627700805664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,48,8,128,0,1,fp8,fp8,0,2.818197250366211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,128,0,1,fp8,fp8,0,10.71292241414388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,128,0,1,fp8,fp8,0,10.71404774983724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,float16,0,13.905829111735025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,fp8,0,13.980944315592447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,fp8,0,13.712170918782553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,float16,0,13.964555104573568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,float16,0,14.064027150472006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,128,0,1,fp8,fp8,0,5.734272003173828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,float16,0,7.495690663655599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,fp8,0,7.516437530517578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,float16,0,6.957306543986003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,fp8,0,7.049381256103516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,128,0,1,fp8,fp8,0,11.040191650390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,fp8,0,14.098463694254557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,2,128,0,1,fp8,fp8,0,5.348890940348308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,128,0,1,fp8,fp8,0,5.40885861714681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,float16,0,6.727178573608398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,fp8,0,7.013274510701497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,float16,0,3.62662410736084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,float16,0,7.231850941975911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,fp8,0,7.322442372639974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,48,8,128,0,1,fp8,fp8,0,5.421525319417317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,fp8,0,3.711887995402018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,48,128,0,1,fp8,fp8,0,3.0399999618530273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,float16,0,3.3872801462809243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,128,0,1,fp8,fp8,0,2.856656074523926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,fp8,0,3.4431680043538413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,float16,0,3.5279626846313477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,128,0,1,fp8,fp8,0,2.8571252822875977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,fp8,0,3.4744478861490884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,float16,0,3.5218187967936196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,float16,0,1.8761013348897297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,fp8,0,3.5141493479410806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,fp8,0,1.925045331319173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,48,128,0,1,fp8,fp8,0,1.6095946629842122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,48,8,128,0,1,fp8,fp8,0,2.826933224995931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,float16,0,1.8388746579488118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,fp8,0,1.8336745897928874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,2,128,0,1,fp8,fp8,0,1.760287920633952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,128,0,1,fp8,fp8,0,1.5554186503092449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,float16,0,1.8501653671264648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,fp8,0,1.8029759724934895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,float16,0,1.8423786163330078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,fp8,0,1.8183040618896484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,48,8,128,0,1,fp8,fp8,0,1.5912639300028484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,128,0,1,fp8,fp8,0,7.747605641682942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,128,0,1,fp8,fp8,0,7.722127914428711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,float16,0,9.799376169840494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,fp8,0,9.809029261271158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,float16,0,10.150271733601889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,fp8,0,9.857653299967447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,float16,0,10.05397860209147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,float16,0,5.187024116516113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,128,0,1,fp8,fp8,0,4.243376096089681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,fp8,0,5.549557367960612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,float16,0,5.22815481821696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,fp8,0,5.080554644266765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,128,0,1,fp8,fp8,0,7.9117170969645185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,fp8,0,10.145978927612305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,2,128,0,1,fp8,fp8,0,3.9097814559936523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,float16,0,5.011685371398926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,128,0,1,fp8,fp8,0,4.014527956644694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,fp8,0,5.190592130025228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,float16,0,5.103061358133952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,fp8,0,5.259615898132324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,float16,0,2.666538715362549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,48,8,128,0,1,fp8,fp8,0,3.9364852905273438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,128,0,1,fp8,fp8,0,2.1677120526631675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,fp8,0,2.6832799911499023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,float16,0,2.502112070719401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,fp8,0,2.488330682118734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,2,128,0,1,fp8,fp8,0,2.1088107426961265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,float16,0,2.5217493375142417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,fp8,0,2.54856538772583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,4,128,0,1,fp8,fp8,0,2.073296070098877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,float16,0,2.4685813585917153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,float16,0,1.36025603612264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,fp8,0,2.485658645629883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,48,8,128,0,1,fp8,fp8,0,2.107978661855062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,fp8,0,1.4045066833496094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,48,128,0,1,fp8,fp8,0,1.2070986429850261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,float16,0,1.3494933446248372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,128,0,1,fp8,fp8,0,1.2602880001068115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,fp8,0,1.317248026529948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,float16,0,1.315567970275879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,128,0,1,fp8,fp8,0,1.1627039909362793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,fp8,0,1.377402623494466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,float16,0,1.3306079705556233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,fp8,0,1.3421707153320312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,48,8,128,0,1,fp8,fp8,0,1.175978660583496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,128,0,1,fp8,fp8,0,10.434346516927084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,128,0,1,fp8,fp8,0,10.203247706095377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,float16,0,13.412816365559896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,fp8,0,13.254085540771484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,float16,0,13.556891123453775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,fp8,0,13.473594665527344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,float16,0,12.948538462320963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,float16,0,7.187114715576172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,fp8,0,7.344117482503255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,48,128,0,1,fp8,fp8,0,5.672890981038411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,float16,0,6.756794611612956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,128,0,1,fp8,fp8,0,10.567824045817057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,fp8,0,6.672618865966797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,fp8,0,13.752591451009115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,2,128,0,1,fp8,fp8,0,5.130064010620117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,128,0,1,fp8,fp8,0,5.224405288696289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,float16,0,6.8336639404296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,fp8,0,6.7058455149332685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,float16,0,6.838629404703776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,float16,0,3.555232048034668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,128,0,1,fp8,fp8,0,5.244906743367513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,fp8,0,3.644240061442057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,48,128,0,1,fp8,fp8,0,2.9156853357950845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,fp8,0,6.691263834635417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,float16,0,3.251615842183431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,fp8,0,3.171840031941732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,2,128,0,1,fp8,fp8,0,2.807450612386068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,float16,0,3.269802729288737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,128,0,1,fp8,fp8,0,2.6732587814331055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,fp8,0,3.300922711690267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,float16,0,3.2201865514119468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,fp8,0,3.3902133305867515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,float16,0,1.7273813883463542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,128,0,1,fp8,fp8,0,1.5134612719217937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,fp8,0,1.7911307017008464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,48,8,128,0,1,fp8,fp8,0,2.7962773640950522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,float16,0,1.814890702565511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,fp8,0,1.697205384572347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,2,128,0,1,fp8,fp8,0,1.682576020558675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,float16,0,1.7188480695088704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,128,0,1,fp8,fp8,0,1.4802080790201824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,fp8,0,1.6883947054545085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,float16,0,1.6892053286234539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,float16,0,0.9580639998118082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,128,0,1,fp8,fp8,0,1.4573920567830403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,fp8,0,0.9715893268585205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,fp8,0,1.6793120702107747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,48,128,0,1,fp8,fp8,0,0.8629279931386312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,float16,0,0.9436533451080322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,fp8,0,0.9673919677734375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,2,128,0,1,fp8,fp8,0,0.8184320131937662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,float16,0,0.9176479975382487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,fp8,0,0.9220960140228271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,4,128,0,1,fp8,fp8,0,0.8831626574198405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,float16,0,0.9294133186340332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,fp8,0,0.9453866481781006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,48,8,128,0,1,fp8,fp8,0,0.8418560028076172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,128,0,1,fp8,fp8,0,6.254426956176758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,128,0,1,fp8,fp8,0,6.344591776529948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,float16,0,7.99395751953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,fp8,0,7.92307726542155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,float16,0,8.063237508138021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,fp8,0,7.907328287760417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,float16,0,7.968725204467773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,128,0,1,fp8,fp8,0,3.4777441024780273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,float16,0,4.39468256632487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,fp8,0,4.181056022644043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,float16,0,3.8530505498250327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,fp8,0,3.8878186543782554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,128,0,1,fp8,fp8,0,6.383888244628906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,fp8,0,8.094565073649088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,2,128,0,1,fp8,fp8,0,3.1713600158691406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,128,0,1,fp8,fp8,0,3.221914609273275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,float16,0,4.028448104858398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,fp8,0,4.141829490661621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,float16,0,4.116901397705078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,fp8,0,3.8884480794270835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,float16,0,2.0399039586385093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,48,8,128,0,1,fp8,fp8,0,3.248069445292155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,fp8,0,2.13808536529541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,48,128,0,1,fp8,fp8,0,1.8347093264261882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,float16,0,1.994117259979248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,fp8,0,2.071898619333903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,2,128,0,1,fp8,fp8,0,1.799359957377116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,float16,0,1.9772639274597168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,fp8,0,1.9237386385599773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,4,128,0,1,fp8,fp8,0,1.7778186798095703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,float16,0,1.9365119934082031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,fp8,0,1.9772426287333171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,float16,0,1.1429333686828613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,48,8,128,0,1,fp8,fp8,0,1.732869307200114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,128,0,1,fp8,fp8,0,0.976149320602417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,fp8,0,1.1175466378529866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,float16,0,1.0429386297861736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,fp8,0,1.0559039910634358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,2,128,0,1,fp8,fp8,0,0.9382719993591309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,float16,0,1.0449546972910564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,fp8,0,1.0447466373443604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,4,128,0,1,fp8,fp8,0,0.9214133421579996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,float16,0,1.0439733664194744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,fp8,0,1.0494293371836345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,float16,0,0.6155786514282227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,48,8,128,0,1,fp8,fp8,0,0.9242453575134277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,fp8,0,0.6230186621348063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,48,128,0,1,fp8,fp8,0,0.5629280010859171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,float16,0,0.6163626511891683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,fp8,0,0.590885321299235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,2,128,0,1,fp8,fp8,0,0.5332586765289307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,float16,0,0.5905599991480509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,fp8,0,0.5935253302256266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,4,128,0,1,fp8,fp8,0,0.535151998202006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,float16,0,0.5936160087585449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,fp8,0,0.5947306553522745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,48,8,128,0,1,fp8,fp8,0,0.5381600062052408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,128,0,1,fp8,fp8,0,6.447706858317058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,128,0,1,fp8,fp8,0,6.524239857991536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,float16,0,8.145941416422525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,fp8,0,8.132080078125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,float16,0,8.001440048217773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,fp8,0,8.052703857421875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,float16,0,8.171557108561197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,128,0,1,fp8,fp8,0,3.600890795389811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,float16,0,4.376426696777344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,fp8,0,4.4235413869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,float16,0,3.792170524597168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,fp8,0,3.811621348063151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,128,0,1,fp8,fp8,0,6.529050827026367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,fp8,0,8.23577626546224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,2,128,0,1,fp8,fp8,0,3.2215731938680015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,128,0,1,fp8,fp8,0,3.2710399627685547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,float16,0,4.071152051289876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,fp8,0,4.005082766215007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,float16,0,4.097615877787272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,fp8,0,4.039679845174153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,float16,0,2.1383466720581055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,48,8,128,0,1,fp8,fp8,0,3.3458614349365234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,fp8,0,2.119472026824951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,48,128,0,1,fp8,fp8,0,1.9980907440185547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,float16,0,1.9536213874816895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,fp8,0,1.9566720326741536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,2,128,0,1,fp8,fp8,0,1.7906346321105957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,float16,0,1.9821866353352864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,fp8,0,1.934175968170166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,4,128,0,1,fp8,fp8,0,1.7082932790120442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,float16,0,1.9403947194417317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,fp8,0,2.010416030883789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,float16,0,1.0944106578826904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,fp8,0,1.1084213256835938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,48,128,0,1,fp8,fp8,0,1.0003306865692139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,48,8,128,0,1,fp8,fp8,0,1.7191093762715657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,float16,0,1.0063040256500244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,fp8,0,1.0157333215077717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,2,128,0,1,fp8,fp8,0,0.9014240105946859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,float16,0,1.0196159680684407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,fp8,0,1.0208640098571777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,4,128,0,1,fp8,fp8,0,0.8972053527832031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,float16,0,1.0149813493092854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,fp8,0,1.0190827051798503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,float16,0,0.5977866649627686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,48,8,128,0,1,fp8,fp8,0,0.9026079972585043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,fp8,0,0.6011253197987875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,48,128,0,1,fp8,fp8,0,0.553605318069458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,128,0,1,fp8,fp8,0,0.5062026580174764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,float16,0,0.5515573422114054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,fp8,0,0.5543839931488037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,float16,0,0.5600106716156006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,fp8,0,0.5581920146942139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,4,128,0,1,fp8,fp8,0,0.5012746651967367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,float16,0,0.5584319829940796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,fp8,0,0.5643359820048014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,48,8,128,0,1,fp8,fp8,0,0.5042186578114828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,float16,0,0.3246346712112427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,float16,0,0.34727998574574787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,fp8,0,0.3531413475672404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,48,128,0,1,fp8,fp8,0,0.32436267534891766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,fp8,0,0.3243359923362732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,2,128,0,1,fp8,fp8,0,0.30318933725357056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,float16,0,0.32547734181086224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,fp8,0,0.328384002049764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,4,128,0,1,fp8,fp8,0,0.3048959970474243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,float16,0,0.3301333387692769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,fp8,0,0.331989328066508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,48,8,128,0,1,fp8,fp8,0,0.30577067534128827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,128,0,1,fp8,fp8,0,4.125343958536784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,float16,0,5.010101318359375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,fp8,0,4.991951942443848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,128,0,1,fp8,fp8,0,4.137765248616536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,float16,0,5.035973230997722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,fp8,0,5.068101247151692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,float16,0,2.6286986668904624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,128,0,1,fp8,fp8,0,2.4198506673177085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,fp8,0,2.6815414428710938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,float16,0,2.425978660583496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,128,0,1,fp8,fp8,0,4.197135925292969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,float16,0,5.151781400044759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,fp8,0,5.006938616434733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,128,0,1,fp8,fp8,0,2.0770506858825684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,fp8,0,2.415887991587321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,float16,0,2.512938658396403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,128,0,1,fp8,fp8,0,2.0913599332173667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,fp8,0,2.409503936767578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,float16,0,1.4017972946166992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,float16,0,2.482367992401123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,fp8,0,2.4747519493103027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,fp8,0,1.3847893079121907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,48,8,128,0,1,fp8,fp8,0,2.109567960103353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,48,128,0,1,fp8,fp8,0,1.2079253196716309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,float16,0,1.2407999833424885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,fp8,0,1.254746675491333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,2,128,0,1,fp8,fp8,0,1.1359253724416096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,float16,0,1.2396213213602703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,fp8,0,1.2419573465983074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,4,128,0,1,fp8,fp8,0,1.088645299275716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,float16,0,1.2420639991760254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,float16,0,0.7211253643035889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,fp8,0,1.2592213153839111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,128,0,1,fp8,fp8,0,0.6861066818237305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,fp8,0,0.7251733144124349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,48,8,128,0,1,fp8,fp8,0,1.1255466938018799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,float16,0,0.6553119818369547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,fp8,0,0.6533546845118204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,2,128,0,1,fp8,fp8,0,0.5897279977798462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,float16,0,0.6733120282491049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,fp8,0,0.67412797609965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,float16,0,0.6632586717605591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,4,128,0,1,fp8,fp8,0,0.5884053309758505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,float16,0,0.39562666416168213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,fp8,0,0.669157346089681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,fp8,0,0.40726399421691895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,48,8,128,0,1,fp8,fp8,0,0.5921973387400309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,fp8,0,0.36533868312835693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,48,128,0,1,fp8,fp8,0,0.3660000165303548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,float16,0,0.36397333939870197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,2,128,0,1,fp8,fp8,0,0.3344159921010335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,float16,0,0.3680320183436076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,fp8,0,0.3691466649373372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,4,128,0,1,fp8,fp8,0,0.3365653355916341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,float16,0,0.2387733260790507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,fp8,0,0.24240533510843912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,float16,0,0.3723413149515788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,fp8,0,0.3740533192952474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,48,8,128,0,1,fp8,fp8,0,0.33902398745218915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,48,128,0,1,fp8,fp8,0,0.22479466597239176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,float16,0,0.222053329149882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,fp8,0,0.2214613358179728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,2,128,0,1,fp8,fp8,0,0.20530666907628378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,float16,0,0.22304532925287882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,fp8,0,0.22248532374699911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,4,128,0,1,fp8,fp8,0,0.2076693375905355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,float16,0,0.22195732593536377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,fp8,0,0.22395733992258707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,48,8,128,0,1,fp8,fp8,0,0.21178666750590006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,128,0,1,fp8,fp8,0,4.537365277608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,float16,0,5.407983779907227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,fp8,0,5.369146982828776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,128,0,1,fp8,fp8,0,4.547338803609212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,float16,0,5.397226969401042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,fp8,0,5.326325416564941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,float16,0,5.51744016011556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,float16,0,2.9873387018839517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,fp8,0,3.0148213704427085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,float16,0,2.623290697733561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,48,128,0,1,fp8,fp8,0,2.5977120399475098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,fp8,0,2.6395947138468423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,128,0,1,fp8,fp8,0,4.623205184936523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,fp8,0,5.479616165161133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,2,128,0,1,fp8,fp8,0,2.3220213254292807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,128,0,1,fp8,fp8,0,2.2809972763061523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,fp8,0,2.610666592915853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,float16,0,2.6154133478800454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,float16,0,2.697349230448405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,fp8,0,2.6745599110921225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,48,8,128,0,1,fp8,fp8,0,2.3203306198120117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,float16,0,1.4683732986450195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,fp8,0,1.4976959228515625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,48,128,0,1,fp8,fp8,0,1.3319786389668782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,float16,0,1.335584004720052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,fp8,0,1.3511253992716472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,2,128,0,1,fp8,fp8,0,1.1996746857961018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,float16,0,1.32315198580424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,fp8,0,1.344912052154541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,4,128,0,1,fp8,fp8,0,1.1728906631469727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,float16,0,1.3405332565307617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,128,0,1,fp8,fp8,0,1.1913706461588542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,fp8,0,1.3537386258443196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,float16,0,0.7726826667785645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,fp8,0,0.7814613183339437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,48,128,0,1,fp8,fp8,0,0.6971039772033691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,float16,0,0.6884960333506266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,fp8,0,0.6920639673868815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,2,128,0,1,fp8,fp8,0,0.6200746695200602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,float16,0,0.694271961847941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,fp8,0,0.7067413330078125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,4,128,0,1,fp8,fp8,0,0.619050661722819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,float16,0,0.6951039632161459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,fp8,0,0.7052000363667806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,48,8,128,0,1,fp8,fp8,0,0.6242666641871134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,float16,0,0.4103519916534424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,fp8,0,0.42049598693847656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,48,128,0,1,fp8,fp8,0,0.3786826531092326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,fp8,0,0.3741813500722249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,float16,0,0.3730986515680949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,2,128,0,1,fp8,fp8,0,0.33831465244293213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,fp8,0,0.37889599800109863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,float16,0,0.37379733721415204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,4,128,0,1,fp8,fp8,0,0.3407680193583171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,float16,0,0.3794879913330078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,fp8,0,0.3823733329772949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,128,0,1,fp8,fp8,0,0.21944000323613486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,48,8,128,0,1,fp8,fp8,0,0.3429439862569173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,float16,0,0.23802133401234946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,fp8,0,0.2394826610883077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,float16,0,0.213754673798879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,fp8,0,0.212442676226298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,2,128,0,1,fp8,fp8,0,0.1978399952252706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,float16,0,0.21307732661565146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,fp8,0,0.2145973245302836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,4,128,0,1,fp8,fp8,0,0.20101332664489746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,fp8,0,0.15037866433461508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,float16,0,0.21667200326919556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,fp8,0,0.21829867362976074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,fp8,0,0.1367039978504181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,48,8,128,0,1,fp8,fp8,0,0.20158400138219199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,float16,0,0.14855466286341348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,48,128,0,1,fp8,fp8,0,0.1397333343823751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,float16,0,0.13710932930310568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,2,128,0,1,fp8,fp8,0,0.12782399853070578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,float16,0,0.135754664738973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,fp8,0,0.13796266913414001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,4,128,0,1,fp8,fp8,0,0.12964800000190735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,float16,0,0.1363200048605601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,fp8,0,0.13871467113494873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,48,8,128,0,1,fp8,fp8,0,0.1276853382587433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,128,0,1,fp8,fp8,0,3.0394506454467773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,float16,0,3.4964319864908853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,fp8,0,3.465354601542155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,128,0,1,fp8,fp8,0,3.0666348139444985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,float16,0,3.5286451975504556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,fp8,0,3.474442799886068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,float16,0,3.5811306635538735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,float16,0,1.9614027341206868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,128,0,1,fp8,fp8,0,1.7704906463623047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,fp8,0,1.985871950785319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,float16,0,1.749776045481364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,fp8,0,1.7382720311482747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,128,0,1,fp8,fp8,0,3.1129226684570312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,fp8,0,3.562101364135742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,2,128,0,1,fp8,fp8,0,1.5447840690612793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,float16,0,1.7472532590230305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,128,0,1,fp8,fp8,0,1.5403520266215007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,fp8,0,1.755674680074056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,float16,0,1.756608009338379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,fp8,0,1.7777120272318523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,48,8,128,0,1,fp8,fp8,0,1.569061279296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,float16,0,0.9999679724375407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,128,0,1,fp8,fp8,0,0.9079039891560873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,fp8,0,1.0314186414082844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,float16,0,0.8886773586273193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,fp8,0,0.8942240079243978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,2,128,0,1,fp8,fp8,0,0.7888586521148682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,float16,0,0.8921226660410563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,fp8,0,0.9006880124409994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,4,128,0,1,fp8,fp8,0,0.7941760222117106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,float16,0,0.5246773163477579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,fp8,0,0.9095679918924967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,float16,0,0.9048159917195638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,48,8,128,0,1,fp8,fp8,0,0.806874672571818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,128,0,1,fp8,fp8,0,0.4806613524754842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,fp8,0,0.5324480136235555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,float16,0,0.4691093365351359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,fp8,0,0.47115198771158856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,float16,0,0.4726080099741618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,2,128,0,1,fp8,fp8,0,0.4209706783294678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,fp8,0,0.47674131393432617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,float16,0,0.47594666481018066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,4,128,0,1,fp8,fp8,0,0.42400534947713214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,fp8,0,0.480565349260966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,float16,0,0.28651734193166095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,48,8,128,0,1,fp8,fp8,0,0.42884798844655353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,fp8,0,0.29370667537053424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,48,128,0,1,fp8,fp8,0,0.2657279968261719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,float16,0,0.2579200069109599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,fp8,0,0.259226659933726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,2,128,0,1,fp8,fp8,0,0.23563732703526816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,float16,0,0.25922133525212604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,fp8,0,0.2595306634902954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,4,128,0,1,fp8,fp8,0,0.2362933357556661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,float16,0,0.26426132520039874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,fp8,0,0.2637386719385783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,48,8,128,0,1,fp8,fp8,0,0.23974400758743286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,float16,0,0.1695093313852946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,fp8,0,0.17283733685811362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,48,128,0,1,fp8,fp8,0,0.15777599811553955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,float16,0,0.14907733599344888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,fp8,0,0.14869866768519083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,2,128,0,1,fp8,fp8,0,0.1360160013039907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,float16,0,0.15080533425013223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,fp8,0,0.1504693329334259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,4,128,0,1,fp8,fp8,0,0.13798399766286215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,float16,0,0.15255467096964517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,fp8,0,0.15214932958285013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,48,8,128,0,1,fp8,fp8,0,0.14203199744224548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,float16,0,0.10123200217882793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,float16,0,0.10586133599281311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,128,0,1,fp8,fp8,0,0.09461866815884908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,fp8,0,0.10757333040237427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,48,128,0,1,fp8,fp8,0,0.1025333305199941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,fp8,0,0.10016000270843506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,float16,0,0.10053333640098572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,fp8,0,0.10082133611043294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,4,128,0,1,fp8,fp8,0,0.09495466947555542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,float16,0,0.10077333450317383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,fp8,0,0.10136000315348308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,48,8,128,0,1,fp8,fp8,0,0.09506666660308838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,float16,0,3.852346738179525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,fp8,0,3.853775978088379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,2,128,0,1,fp8,fp8,0,3.7269280751546225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,float16,0,3.8474133809407554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,fp8,0,3.8579254150390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,4,128,0,1,fp8,fp8,0,3.77510929107666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,float16,0,4.003146807352702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,float16,0,2.260629336039225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,fp8,0,2.1896373430887857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,float16,0,1.8539573351542156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,48,128,0,1,fp8,fp8,0,2.1090879440307617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,fp8,0,3.984389305114746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,48,8,128,0,1,fp8,fp8,0,3.7789281209309897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,fp8,0,1.8978293736775715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,2,128,0,1,fp8,fp8,0,1.8397919336954753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,float16,0,1.8825173377990723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,fp8,0,1.8734985987345378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,4,128,0,1,fp8,fp8,0,1.8357866605122883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,float16,0,1.9078559875488281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,float16,0,1.085536003112793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,fp8,0,1.9210453033447266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,fp8,0,1.0834240118662517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,float16,0,0.9426613648732504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,48,8,128,0,1,fp8,fp8,0,1.8603679339090984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,48,128,0,1,fp8,fp8,0,1.041109323501587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,fp8,0,0.9457386334737142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,2,128,0,1,fp8,fp8,0,0.8698879877726237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,float16,0,0.9450133641560873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,fp8,0,0.9451786677042643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,4,128,0,1,fp8,fp8,0,0.8736266295115153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,float16,0,0.9661920070648193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,fp8,0,0.9601546923319498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,float16,0,0.5561546484629313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,fp8,0,0.5456053415934244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,48,8,128,0,1,fp8,fp8,0,0.9098506768544515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,48,128,0,1,fp8,fp8,0,0.5329493284225464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,float16,0,0.4831093152364095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,fp8,0,0.482746680577596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,2,128,0,1,fp8,fp8,0,0.4452213446299235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,float16,0,0.48369598388671875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,fp8,0,0.48419201374053955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,float16,0,0.4935946861902873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,fp8,0,0.49531733989715576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,4,128,0,1,fp8,fp8,0,0.4509013493855794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,float16,0,0.29073599974314374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,48,8,128,0,1,fp8,fp8,0,0.465178648630778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,fp8,0,0.2852320075035095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,48,128,0,1,fp8,fp8,0,0.2794293363889058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,float16,0,0.2541866699854533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,fp8,0,0.2550666729609172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,128,0,1,fp8,fp8,0,0.2346186637878418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,2,128,0,1,fp8,fp8,0,0.2353760004043579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,float16,0,0.2550346652666728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,128,0,1,fp8,fp8,0,0.24447466929753622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,fp8,0,0.2575146754582723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,fp8,0,0.15751999616622925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,float16,0,0.26201067368189496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,fp8,0,0.260373334089915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,float16,0,0.1595200002193451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,48,128,0,1,fp8,fp8,0,0.15198933084805807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,float16,0,0.13763200243314108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,fp8,0,0.1386666695276896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,2,128,0,1,fp8,fp8,0,0.12614400188128153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,float16,0,0.13942399621009827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,fp8,0,0.14008000493049622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,4,128,0,1,fp8,fp8,0,0.12813867131868997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,float16,0,0.14199466506640115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,fp8,0,0.14170666535695395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,float16,0,0.08210666477680206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,48,8,128,0,1,fp8,fp8,0,0.13358933726946512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,float16,0,0.09438932935396831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,fp8,0,0.09310932954152425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,48,128,0,1,fp8,fp8,0,0.0923466682434082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,fp8,0,0.08126399914423625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,2,128,0,1,fp8,fp8,0,0.0728000005086263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,float16,0,0.0805866668621699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,fp8,0,0.08164266745249431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,4,128,0,1,fp8,fp8,0,0.07251733541488647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,float16,0,0.08284799754619598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,fp8,0,0.08203733464082082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,48,8,128,0,1,fp8,fp8,0,0.07422933479150136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,float16,0,0.05606933434804281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,fp8,0,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,48,128,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,float16,0,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,fp8,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,fp8,0,0.05482666691144308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,2,128,0,1,fp8,fp8,0,0.04762133459250132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,fp8,0,0.054005334774653115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,float16,0,0.05409599840641022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,4,128,0,1,fp8,fp8,0,0.04828799764315287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,float16,0,0.053946668903032936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,48,8,128,0,1,fp8,fp8,0,0.04994666576385498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,float16,0,3.294528007507324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,fp8,0,3.322533289591471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,2,128,0,1,fp8,fp8,0,3.250298817952474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,float16,0,3.306015968322754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,fp8,0,3.323733329772949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,4,128,0,1,fp8,fp8,0,3.2675253550211587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,float16,0,1.9122986793518066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,fp8,0,1.885306676228841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,float16,0,1.5999946594238281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,48,128,0,1,fp8,fp8,0,1.8414719899495442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,float16,0,3.3737494150797525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,fp8,0,3.4038238525390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,48,8,128,0,1,fp8,fp8,0,3.2918294270833335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,fp8,0,1.6076107025146484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,2,128,0,1,fp8,fp8,0,1.5820426940917969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,float16,0,1.5980213483174641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,fp8,0,1.609066645304362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,4,128,0,1,fp8,fp8,0,1.6092693010965984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,float16,0,1.6518452962239583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,fp8,0,1.6659305890401204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,float16,0,0.9422826766967773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,fp8,0,0.9321226278940836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,48,128,0,1,fp8,fp8,0,0.914031982421875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,48,8,128,0,1,fp8,fp8,0,1.6352853775024414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,float16,0,0.8068640232086182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,fp8,0,0.8121279875437418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,2,128,0,1,fp8,fp8,0,0.7456639607747396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,float16,0,0.808842658996582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,fp8,0,0.8106346925099691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,4,128,0,1,fp8,fp8,0,0.7497440179189047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,float16,0,0.825920025507609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,float16,0,0.483024001121521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,128,0,1,fp8,fp8,0,0.7838613192240397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,fp8,0,0.8269813060760498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,fp8,0,0.47179198265075684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,48,128,0,1,fp8,fp8,0,0.4667359987894694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,float16,0,0.4129013220469157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,fp8,0,0.4132479826609294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,2,128,0,1,fp8,fp8,0,0.3820106585820516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,128,0,1,fp8,fp8,0,0.3853013515472412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,float16,0,0.41580267747243244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,fp8,0,0.41380266348520917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,float16,0,0.42471468448638916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,fp8,0,0.42484267552693683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,128,0,1,fp8,fp8,0,0.24560532967249551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,48,8,128,0,1,fp8,fp8,0,0.399125337600708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,float16,0,0.2537226676940918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,fp8,0,0.2483146588007609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,float16,0,0.21783999601999918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,fp8,0,0.2189813256263733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,2,128,0,1,fp8,fp8,0,0.20099733273188272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,float16,0,0.21958933273951212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,fp8,0,0.21966934204101562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,4,128,0,1,fp8,fp8,0,0.20113599300384521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,float16,0,0.22395733992258707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,fp8,0,0.22409067551294962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,48,8,128,0,1,fp8,fp8,0,0.21079999208450317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,float16,0,0.13832533359527588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,fp8,0,0.13593600193659464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,48,128,0,1,fp8,fp8,0,0.1349546710650126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,float16,0,0.11764267086982727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,fp8,0,0.11867733796437581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,float16,0,0.1216266651948293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,2,128,0,1,fp8,fp8,0,0.1086293359597524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,float16,0,0.11732799808184306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,fp8,0,0.1181813379128774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,4,128,0,1,fp8,fp8,0,0.10935999949773152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,fp8,0,0.12164800365765889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,48,8,128,0,1,fp8,fp8,0,0.11452800035476685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,float16,0,0.07990399996439616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,fp8,0,0.07710400223731995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,48,128,0,1,fp8,fp8,0,0.07960000137488048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,float16,0,0.06884799897670746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,128,0,1,fp8,fp8,0,0.06159466505050659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,fp8,0,0.06833066542943318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,2,128,0,1,fp8,fp8,0,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,float16,0,0.0682239979505539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,fp8,0,0.06881600121657054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,float16,0,0.06877333422501881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,fp8,0,0.07020266850789388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,48,8,128,0,1,fp8,fp8,0,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,float16,0,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,48,128,0,1,fp8,fp8,0,0.04562666515509287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,float16,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,2,128,0,1,fp8,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,float16,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,fp8,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,fp8,0,0.045935998360315956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,4,128,0,1,fp8,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,float16,0,0.04543466866016388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,48,8,128,0,1,fp8,fp8,0,0.04152533411979675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,float16,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,48,128,0,1,fp8,fp8,0,0.030591999491055805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,float16,0,0.030805334448814392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,2,128,0,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,fp8,0,0.0296426663796107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,4,128,0,1,fp8,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,48,8,128,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,float16,0,1.4453546206156414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,fp8,0,1.4638932545979817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,2,128,0,1,fp8,fp8,0,1.4249760309855144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,float16,0,1.455125331878662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,fp8,0,1.4650346438090007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,4,128,0,1,fp8,fp8,0,1.4883467356363933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,float16,0,1.4923094113667805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,float16,0,0.8597706953684489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,fp8,0,0.8372639815012614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,fp8,0,1.4907840092976887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,48,128,0,1,fp8,fp8,0,0.8472692966461182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,48,8,128,0,1,fp8,fp8,0,1.5007360776265461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,float16,0,0.7290933132171631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,fp8,0,0.7311840057373047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,2,128,0,1,fp8,fp8,0,0.683135986328125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,float16,0,0.7313120365142822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,fp8,0,0.7339359919230143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,4,128,0,1,fp8,fp8,0,0.679317315419515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,float16,0,0.7511093616485596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,fp8,0,0.7504533131917318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,float16,0,0.4381066560745239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,fp8,0,0.4301066795984904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,48,8,128,0,1,fp8,fp8,0,0.7100266615549723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,48,128,0,1,fp8,fp8,0,0.4314773480097453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,float16,0,0.37458133697509766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,fp8,0,0.3744906584421794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,fp8,0,0.37561599413553876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,2,128,0,1,fp8,fp8,0,0.3445813258488973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,float16,0,0.37380266189575195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,4,128,0,1,fp8,fp8,0,0.34699734052022296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,float16,0,0.38441598415374756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,fp8,0,0.38388268152872723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,128,0,1,fp8,fp8,0,0.22680532932281494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,float16,0,0.23195733626683554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,48,8,128,0,1,fp8,fp8,0,0.36318933963775635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,fp8,0,0.22828799486160278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,float16,0,0.19917333126068115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,float16,0,0.19790933529535928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,fp8,0,0.1973066727320353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,2,128,0,1,fp8,fp8,0,0.18234666188557944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,fp8,0,0.19740267594655356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,4,128,0,1,fp8,fp8,0,0.1828213334083557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,float16,0,0.20422399044036865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,128,0,1,fp8,fp8,0,0.12460800011952718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,fp8,0,0.20322134097417197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,48,8,128,0,1,fp8,fp8,0,0.19089599450429282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,float16,0,0.12843199570973715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,fp8,0,0.12572266658147177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,float16,0,0.10729066530863444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,fp8,0,0.10790399710337321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,2,128,0,1,fp8,fp8,0,0.0990133285522461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,float16,0,0.10912533601125081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,128,0,1,fp8,fp8,0,0.10479467113812764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,fp8,0,0.10860266288121541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,4,128,0,1,fp8,fp8,0,0.10075199604034424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,float16,0,0.11136533816655476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,fp8,0,0.11147733529408772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,float16,0,0.07261333366235097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,fp8,0,0.07117333511511485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,48,128,0,1,fp8,fp8,0,0.07310933371384938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,float16,0,0.06018133461475372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,fp8,0,0.06113600234190623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,2,128,0,1,fp8,fp8,0,0.055120001236597695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,float16,0,0.060789331793785095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,4,128,0,1,fp8,fp8,0,0.05573866764704386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,float16,0,0.06176533301671346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,float16,0,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,fp8,0,0.061861331264177956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,48,8,128,0,1,fp8,fp8,0,0.05641599992911021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,float16,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,fp8,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,float16,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,48,128,0,1,fp8,fp8,0,0.04213866591453552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,fp8,0,0.041365332901477814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,2,128,0,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,4,128,0,1,fp8,fp8,0,0.03748266647259394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,float16,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,fp8,0,0.042037333051363625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,48,8,128,0,1,fp8,fp8,0,0.038762666285037994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,float16,0,0.02815466622511546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,fp8,0,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,48,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,float16,0,0.026816000541051228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,2,128,0,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,4,128,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,float16,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,48,8,128,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,float16,0,0.02425066630045573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,48,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,float16,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,2,128,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,float16,0,0.024522667129834492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,4,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,float16,0,0.024346667031447094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,48,8,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,float16,0,0.8019786675771078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,fp8,0,0.7986506621042887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,2,128,0,1,fp8,fp8,0,0.7390186786651611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,float16,0,0.8055093288421631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,fp8,0,0.8050933678944906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,4,128,0,1,fp8,fp8,0,0.7611467043558756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,float16,0,0.4728586673736572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,float16,0,0.8246400356292725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,fp8,0,0.8209066390991211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,48,8,128,0,1,fp8,fp8,0,0.7732906341552734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,fp8,0,0.4636960029602051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,48,128,0,1,fp8,fp8,0,0.467904011408488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,float16,0,0.40755732854207355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,128,0,1,fp8,fp8,0,0.3782399892807007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,fp8,0,0.4078133503595988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,float16,0,0.40961599349975586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,fp8,0,0.4090026617050171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,4,128,0,1,fp8,fp8,0,0.379253347714742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,float16,0,0.41996800899505615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,fp8,0,0.4182506799697876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,48,8,128,0,1,fp8,fp8,0,0.39320532480875653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,float16,0,0.24530667066574097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,fp8,0,0.24038932720820108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,48,128,0,1,fp8,fp8,0,0.24050132433573404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,float16,0,0.21278399229049683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,float16,0,0.21081066131591797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,128,0,1,fp8,fp8,0,0.1966186761856079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,fp8,0,0.21243733167648315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,2,128,0,1,fp8,fp8,0,0.19483733177185059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,fp8,0,0.21184533834457397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,float16,0,0.21760000785191855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,fp8,0,0.21580266952514648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,float16,0,0.11313600341478984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,48,8,128,0,1,fp8,fp8,0,0.20465600490570068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,float16,0,0.13214932878812155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,fp8,0,0.12869866689046225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,48,128,0,1,fp8,fp8,0,0.13085333506266275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,fp8,0,0.11321066816647847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,2,128,0,1,fp8,fp8,0,0.10429333647092183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,float16,0,0.11331199606259663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,fp8,0,0.11417599519093831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,4,128,0,1,fp8,fp8,0,0.10482133428255717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,fp8,0,0.07434133191903432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,float16,0,0.11675199866294861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,fp8,0,0.11726933717727661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,48,8,128,0,1,fp8,fp8,0,0.11000532905260722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,float16,0,0.07495466868082683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,48,128,0,1,fp8,fp8,0,0.07627733548482259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,float16,0,0.06504000226656596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,128,0,1,fp8,fp8,0,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,float16,0,0.06724800169467926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,fp8,0,0.06648533542950948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,2,128,0,1,fp8,fp8,0,0.05904000004132589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,float16,0,0.0661653329928716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,fp8,0,0.06592000027497609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,fp8,0,0.06613333523273468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,48,8,128,0,1,fp8,fp8,0,0.060933331648508705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,float16,0,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,fp8,0,0.04301333427429199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,48,128,0,1,fp8,fp8,0,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,float16,0,0.03997333347797394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,fp8,0,0.04014399896065394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,2,128,0,1,fp8,fp8,0,0.03737066686153412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,float16,0,0.04027733455101649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,fp8,0,0.03992533435424169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,4,128,0,1,fp8,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,float16,0,0.04109866668780645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,128,0,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,48,8,128,0,1,fp8,fp8,0,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,float16,0,0.028575999041398365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,2,128,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,float16,0,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,fp8,0,0.028965334097544353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,4,128,0,1,fp8,fp8,0,0.02641066660483678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,48,8,128,0,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,fp8,0,0.021594665944576263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,48,128,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,2,128,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,fp8,0,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,4,128,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,48,8,128,0,1,fp8,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,48,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,2,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,4,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,48,8,128,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,float16,0,0.5219466686248779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,fp8,0,0.5196799834569296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,2,128,0,1,fp8,fp8,0,0.48742401599884033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,float16,0,0.5228000084559122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,fp8,0,0.5210346778233846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,4,128,0,1,fp8,fp8,0,0.48711466789245605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,float16,0,0.5329333146413168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,float16,0,0.3028320074081421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,fp8,0,0.530837337176005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,fp8,0,0.29711467027664185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,48,8,128,0,1,fp8,fp8,0,0.4994613329569499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,128,0,1,fp8,fp8,0,0.2489173412322998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,48,128,0,1,fp8,fp8,0,0.29334400097529095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,float16,0,0.268832008043925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,fp8,0,0.2675413290659587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,float16,0,0.2690560022989909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,fp8,0,0.2703839937845866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,float16,0,0.15852800011634827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,4,128,0,1,fp8,fp8,0,0.2508373260498047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,float16,0,0.2731893261273702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,128,0,1,fp8,fp8,0,0.15636266271273294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,fp8,0,0.27354133129119873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,48,8,128,0,1,fp8,fp8,0,0.25731732447942096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,fp8,0,0.15616533160209656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,float16,0,0.14196266730626425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,fp8,0,0.14173866311709085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,2,128,0,1,fp8,fp8,0,0.1291253368059794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,float16,0,0.1420799990495046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,fp8,0,0.1418453355630239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,4,128,0,1,fp8,fp8,0,0.13312000036239624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,float16,0,0.14523733655611673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,fp8,0,0.14408000310262045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,48,8,128,0,1,fp8,fp8,0,0.1365066667397817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,float16,0,0.08853333195050557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,fp8,0,0.08824533224105835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,float16,0,0.07894399762153625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,48,128,0,1,fp8,fp8,0,0.08845866719881694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,float16,0,0.07941333452860515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,fp8,0,0.07931733131408691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,2,128,0,1,fp8,fp8,0,0.07089599967002869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,128,0,1,fp8,fp8,0,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,fp8,0,0.07940799991289775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,fp8,0,0.049600000182787575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,4,128,0,1,fp8,fp8,0,0.07077866792678833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,float16,0,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,float16,0,0.08046400050322215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,fp8,0,0.07962666451931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,float16,0,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,48,128,0,1,fp8,fp8,0,0.04882133503754934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,2,128,0,1,fp8,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,fp8,0,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,128,0,1,fp8,fp8,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,float16,0,0.04823466638724009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,fp8,0,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,4,128,0,1,fp8,fp8,0,0.044293334086736046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,float16,0,0.04866133133570353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,float16,0,0.0310506671667099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,48,128,0,1,fp8,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,float16,0,0.029066666960716248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,2,128,0,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,float16,0,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,4,128,0,1,fp8,fp8,0,0.028442665934562683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,48,8,128,0,1,fp8,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,48,128,0,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,float16,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,2,128,0,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,4,128,0,1,fp8,fp8,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,48,8,128,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,48,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,2,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,48,8,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,48,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,4,128,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,48,8,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,float16,0,0.38738131523132324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,fp8,0,0.3872160116831462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,2,128,0,1,fp8,fp8,0,0.362831989924113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,float16,0,0.3871573209762573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,fp8,0,0.3864533503850301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,4,128,0,1,fp8,fp8,0,0.3628693421681722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,float16,0,0.3909279902776082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,fp8,0,0.392192006111145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,48,8,128,0,1,fp8,fp8,0,0.37145598729451496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,float16,0,0.21591466665267944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,fp8,0,0.21545066436131796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,48,128,0,1,fp8,fp8,0,0.21150400241216025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,float16,0,0.19987199703852335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,fp8,0,0.20055466890335083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,2,128,0,1,fp8,fp8,0,0.18782933553059897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,float16,0,0.20097066958745322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,fp8,0,0.20216000080108643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,fp8,0,0.2007946570714315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,4,128,0,1,fp8,fp8,0,0.18922666708628336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,float16,0,0.20347734292348227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,48,8,128,0,1,fp8,fp8,0,0.19325866301854452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,float16,0,0.11622400085131328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,fp8,0,0.11482133467992146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,48,128,0,1,fp8,fp8,0,0.11685867110888164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,float16,0,0.10801066954930623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,fp8,0,0.10896533727645874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,2,128,0,1,fp8,fp8,0,0.09876267115275066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,float16,0,0.10844266414642334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,fp8,0,0.10730666915575664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,4,128,0,1,fp8,fp8,0,0.09915199875831604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,float16,0,0.1090133289496104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,float16,0,0.06333866715431213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,fp8,0,0.10846400260925293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,48,8,128,0,1,fp8,fp8,0,0.10046399633089702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,float16,0,0.06414400041103363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,48,128,0,1,fp8,fp8,0,0.061792001128196716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,128,0,1,fp8,fp8,0,0.056847999493281044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,fp8,0,0.062037333846092224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,2,128,0,1,fp8,fp8,0,0.056645333766937256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,float16,0,0.06228800117969513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,fp8,0,0.0620959997177124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,float16,0,0.06221333146095276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,fp8,0,0.062090665102005005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,48,8,128,0,1,fp8,fp8,0,0.057775999108950295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,float16,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,fp8,0,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,48,128,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,fp8,0,0.03825066735347112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,128,0,1,fp8,fp8,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,float16,0,0.03836799909671148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,fp8,0,0.03845866769552231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,2,128,0,1,fp8,fp8,0,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,float16,0,0.03858133405447006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,float16,0,0.03832533210515976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,48,8,128,0,1,fp8,fp8,0,0.03737066686153412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,float16,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,48,128,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,float16,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,2,128,0,1,fp8,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,float16,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,4,128,0,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,48,8,128,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,48,128,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,2,128,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,4,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,float16,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,fp8,0,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,48,8,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,48,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,2,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,float16,0,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,48,8,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,48,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,2,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,4,128,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,48,8,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,float16,0,0.31588266293207806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,fp8,0,0.3149600028991699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,2,128,0,1,fp8,fp8,0,0.30187733968098956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,float16,0,0.3174346685409546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,fp8,0,0.31620800495147705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,4,128,0,1,fp8,fp8,0,0.3052000006039937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,float16,0,0.3190079927444458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,fp8,0,0.3174346685409546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,float16,0,0.17377599080403647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,48,8,128,0,1,fp8,fp8,0,0.3081173300743103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,fp8,0,0.17322667439778647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,48,128,0,1,fp8,fp8,0,0.17417599757512411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,float16,0,0.1667520006497701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,fp8,0,0.1664426624774933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,2,128,0,1,fp8,fp8,0,0.15611732999483743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,float16,0,0.16542399923006693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,fp8,0,0.16429332892100015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,4,128,0,1,fp8,fp8,0,0.1554080049196879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,float16,0,0.16554133097330728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,128,0,1,fp8,fp8,0,0.08949866890907288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,fp8,0,0.16659733653068542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,48,8,128,0,1,fp8,fp8,0,0.15804266929626465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,float16,0,0.09267733494440715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,fp8,0,0.09304533402125041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,float16,0,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,fp8,0,0.09035733342170715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,2,128,0,1,fp8,fp8,0,0.08559466401735942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,float16,0,0.09117866555849712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,fp8,0,0.09131733576456706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,4,128,0,1,fp8,fp8,0,0.0848479966322581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,float16,0,0.09130133191744487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,fp8,0,0.09083732962608337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,float16,0,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,48,8,128,0,1,fp8,fp8,0,0.08545600374539693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,float16,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,fp8,0,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,48,128,0,1,fp8,fp8,0,0.051776001850763954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,float16,0,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,fp8,0,0.053504000107447304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,2,128,0,1,fp8,fp8,0,0.05026666820049286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,float16,0,0.05220800141493479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,fp8,0,0.053039997816085815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,4,128,0,1,fp8,fp8,0,0.0499839981396993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,48,8,128,0,1,fp8,fp8,0,0.05159999926884969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,float16,0,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,48,128,0,1,fp8,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,float16,0,0.03369066615899404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,fp8,0,0.03494933247566223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,2,128,0,1,fp8,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,float16,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,fp8,0,0.033759998778502144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,4,128,0,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,float16,0,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,fp8,0,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,128,0,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,48,8,128,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,2,128,0,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,4,128,0,1,fp8,fp8,0,0.021962667504946392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,48,8,128,0,1,fp8,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,float16,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,fp8,0,0.020058666666348774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,128,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,48,128,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,float16,0,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,4,128,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,fp8,0,0.020053333292404812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,48,8,128,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,48,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,2,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,4,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,48,8,128,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,48,128,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,4,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,48,8,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,128,0,1,fp8,fp8,0,15.024064381917318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,128,0,1,fp8,fp8,0,14.96118418375651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,fp8,0,19.5862299601237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,float16,0,19.778767903645832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,float16,0,19.290047963460285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,fp8,0,19.777088165283203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,float16,0,20.27941385904948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,128,0,1,fp8,fp8,0,8.044298807779947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,float16,0,10.273663838704428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,fp8,0,10.241546630859375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,float16,0,9.940229415893555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,fp8,0,9.888186772664389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,128,0,1,fp8,fp8,0,15.322869618733725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,fp8,0,19.697301228841145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,2,128,0,1,fp8,fp8,0,7.536874771118164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,128,0,1,fp8,fp8,0,7.5892588297526045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,float16,0,10.004938761393229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,fp8,0,10.010677337646484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,float16,0,9.94706662495931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,fp8,0,9.8908322652181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,float16,0,5.2878774007161455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,40,8,128,0,1,fp8,fp8,0,7.7550614674886065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,fp8,0,5.222261428833008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,40,128,0,1,fp8,fp8,0,4.110154787699382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,float16,0,5.015834808349609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,128,0,1,fp8,fp8,0,3.87770144144694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,fp8,0,4.902592023213704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,float16,0,5.115845362345378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,fp8,0,5.09340254465739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,4,128,0,1,fp8,fp8,0,3.908672014872233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,float16,0,2.5448853174845376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,128,0,1,fp8,fp8,0,3.895327885945638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,float16,0,5.217093467712402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,fp8,0,2.5807627042134604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,fp8,0,5.187909444173177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,40,128,0,1,fp8,fp8,0,2.300543944040934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,float16,0,2.489429314931234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,fp8,0,2.472106615702311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,2,128,0,1,fp8,fp8,0,2.112757364908854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,float16,0,2.610736052195231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,fp8,0,2.5037387212117515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,4,128,0,1,fp8,fp8,0,2.306309382120768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,float16,0,2.5197013219197593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,fp8,0,2.5607892672220864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,40,8,128,0,1,fp8,fp8,0,2.1337226231892905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,128,0,1,fp8,fp8,0,8.896944046020508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,128,0,1,fp8,fp8,0,8.960197448730469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,float16,0,11.398816426595053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,fp8,0,11.469056447347006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,float16,0,11.794682820638021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,fp8,0,11.777754465738932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,float16,0,11.743595123291016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,float16,0,6.126869201660156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,128,0,1,fp8,fp8,0,4.83021863301595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,fp8,0,6.136453628540039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,float16,0,6.037919998168945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,128,0,1,fp8,fp8,0,8.980304082234701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,fp8,0,5.81765874226888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,fp8,0,11.956843058268229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,2,128,0,1,fp8,fp8,0,4.557125409444173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,float16,0,5.874410629272461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,fp8,0,6.002261479695638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,4,128,0,1,fp8,fp8,0,4.497935930887858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,float16,0,5.92466672261556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,float16,0,3.0223573048909507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,128,0,1,fp8,fp8,0,4.520144144694011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,fp8,0,5.850543975830078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,fp8,0,3.1972265243530273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,40,128,0,1,fp8,fp8,0,2.5710719426472983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,float16,0,2.8282238642374673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,128,0,1,fp8,fp8,0,2.3954720497131348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,fp8,0,2.848069190979004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,float16,0,3.0474398930867515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,128,0,1,fp8,fp8,0,2.4062347412109375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,fp8,0,2.877504030863444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,float16,0,2.937082608540853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,fp8,0,2.912090619405111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,float16,0,1.6031786600748699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,128,0,1,fp8,fp8,0,1.3592640558878581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,40,8,128,0,1,fp8,fp8,0,2.364522616068522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,fp8,0,1.708661397298177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,float16,0,1.520095984141032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,fp8,0,1.5530079205830891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,2,128,0,1,fp8,fp8,0,1.5409653981526692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,float16,0,1.5364960034688313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,fp8,0,1.5187733968098958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,4,128,0,1,fp8,fp8,0,1.310655991236369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,float16,0,1.5330880482991536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,fp8,0,1.5346186955769856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,40,8,128,0,1,fp8,fp8,0,1.366597334543864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,128,0,1,fp8,fp8,0,6.393119812011719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,128,0,1,fp8,fp8,0,6.371893564860026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,float16,0,8.315381368001303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,fp8,0,8.277530670166016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,float16,0,8.300048192342123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,fp8,0,8.270122528076172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,float16,0,8.417253494262695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,float16,0,4.428768157958984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,fp8,0,4.519839922587077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,40,128,0,1,fp8,fp8,0,3.536282539367676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,float16,0,4.134245236714681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,fp8,0,4.206474622090657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,128,0,1,fp8,fp8,0,6.494869232177734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,fp8,0,8.341594696044922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,2,128,0,1,fp8,fp8,0,3.3038291931152344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,fp8,0,4.261647860209147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,128,0,1,fp8,fp8,0,3.3113813400268555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,float16,0,4.186837196350098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,float16,0,4.273381233215332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,fp8,0,4.334383964538574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,float16,0,2.0914079348246255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,40,8,128,0,1,fp8,fp8,0,3.3270400365193686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,128,0,1,fp8,fp8,0,1.8149174054463704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,fp8,0,2.23415470123291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,float16,0,2.0828000704447427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,128,0,1,fp8,fp8,0,1.816197395324707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,fp8,0,2.1329174041748047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,float16,0,2.1226399739583335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,fp8,0,2.035600026448568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,4,128,0,1,fp8,fp8,0,1.7359840075174968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,float16,0,2.1328479448954263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,float16,0,1.1817866961161296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,fp8,0,2.1581385930379233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,40,8,128,0,1,fp8,fp8,0,1.7452692985534668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,fp8,0,1.1956000328063965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,40,128,0,1,fp8,fp8,0,1.0850186347961426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,float16,0,1.1079786618550618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,fp8,0,1.2054026921590169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,2,128,0,1,fp8,fp8,0,1.009071985880534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,float16,0,1.1240746974945068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,fp8,0,1.1159040133158367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,4,128,0,1,fp8,fp8,0,1.019034703572591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,float16,0,1.1270506381988525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,fp8,0,1.1145599683125813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,40,8,128,0,1,fp8,fp8,0,1.033509333928426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,128,0,1,fp8,fp8,0,8.52181879679362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,128,0,1,fp8,fp8,0,8.651354471842447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,float16,0,11.140815734863281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,fp8,0,11.259573618570963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,float16,0,11.211551666259766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,fp8,0,11.075300852457682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,float16,0,11.177210489908854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,128,0,1,fp8,fp8,0,4.7062028249104815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,float16,0,5.916922887166341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,fp8,0,6.0993226369222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,float16,0,5.550640106201172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,fp8,0,5.687498728434245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,128,0,1,fp8,fp8,0,8.858650843302408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,fp8,0,11.292699178059896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,2,128,0,1,fp8,fp8,0,4.307135899861653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,float16,0,5.886597315470378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,128,0,1,fp8,fp8,0,4.363594690958659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,fp8,0,5.508629480997722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,float16,0,5.678080240885417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,128,0,1,fp8,fp8,0,4.354672114054362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,fp8,0,5.5100962320963545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,128,0,1,fp8,fp8,0,2.3941173553466797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,fp8,0,2.919487953186035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,float16,0,3.0611254374186196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,float16,0,2.824122746785482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,fp8,0,2.7214988072713218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,2,128,0,1,fp8,fp8,0,2.2200640042622886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,float16,0,2.777023951212565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,128,0,1,fp8,fp8,0,2.3239146868387857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,fp8,0,2.7750399907430015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,float16,0,2.7426506678263345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,128,0,1,fp8,fp8,0,2.321920077006022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,float16,0,1.4446293512980144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,fp8,0,2.7383413314819336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,128,0,1,fp8,fp8,0,1.2695306936899822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,fp8,0,1.4892746607462566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,float16,0,1.5260106722513835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,fp8,0,1.4798240661621094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,2,128,0,1,fp8,fp8,0,1.2069813410441081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,float16,0,1.392917315165202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,fp8,0,1.4191306432088215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,4,128,0,1,fp8,fp8,0,1.2039946715037029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,float16,0,1.4239519437154133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,float16,0,0.8053759733835856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,fp8,0,1.4126346906026204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,40,8,128,0,1,fp8,fp8,0,1.275391976038615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,fp8,0,0.821237325668335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,40,128,0,1,fp8,fp8,0,0.7562399705251058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,float16,0,0.7835573355356852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,fp8,0,0.7844586372375488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,2,128,0,1,fp8,fp8,0,0.6942240397135416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,float16,0,0.7884746392567953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,fp8,0,0.791968027750651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,4,128,0,1,fp8,fp8,0,0.6982453664143881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,float16,0,0.7878666718800863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,fp8,0,0.7927359739939371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,40,8,128,0,1,fp8,fp8,0,0.7023839950561523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,128,0,1,fp8,fp8,0,5.229728062947591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,128,0,1,fp8,fp8,0,5.219312032063802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,float16,0,6.58409055074056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,fp8,0,6.594448089599609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,fp8,0,6.572954813639323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,float16,0,6.713413238525391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,float16,0,6.753194808959961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,float16,0,3.5637334187825522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,fp8,0,3.533114751180013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,40,128,0,1,fp8,fp8,0,2.988581339518229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,float16,0,3.215632120768229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,fp8,0,3.281071980794271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,128,0,1,fp8,fp8,0,5.314661343892415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,fp8,0,6.798885345458984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,2,128,0,1,fp8,fp8,0,2.6696106592814126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,128,0,1,fp8,fp8,0,2.6994825998942056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,fp8,0,3.3538986841837564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,float16,0,3.382335980733236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,float16,0,3.2924372355143228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,fp8,0,3.4069973627726235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,float16,0,1.7437814076741536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,40,8,128,0,1,fp8,fp8,0,2.728762626647949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,fp8,0,1.724522590637207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,40,128,0,1,fp8,fp8,0,1.600671927134196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,float16,0,1.6723039944966633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,fp8,0,1.6477386156717937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,2,128,0,1,fp8,fp8,0,1.4403947194417317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,float16,0,1.6630667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,fp8,0,1.6492692629496257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,4,128,0,1,fp8,fp8,0,1.4414292971293132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,float16,0,0.9362133344014486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,float16,0,1.631333351135254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,fp8,0,1.6925066312154133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,40,8,128,0,1,fp8,fp8,0,1.5248586336771648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,fp8,0,0.9576906363169352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,40,128,0,1,fp8,fp8,0,0.8764373461405436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,float16,0,0.8690400123596191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,fp8,0,0.8883146444956461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,2,128,0,1,fp8,fp8,0,0.7661226590474447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,float16,0,0.9278613726298014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,fp8,0,0.8907787005106608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,4,128,0,1,fp8,fp8,0,0.7974026997884115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,float16,0,0.9013706843058268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,fp8,0,0.8796213467915853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,float16,0,0.522271990776062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,40,8,128,0,1,fp8,fp8,0,0.777733325958252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,fp8,0,0.5354826847712199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,40,128,0,1,fp8,fp8,0,0.4801013469696045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,float16,0,0.5070079962412516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,float16,0,0.5007946491241455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,fp8,0,0.5157279968261719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,2,128,0,1,fp8,fp8,0,0.4694186846415202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,fp8,0,0.5052533149719238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,4,128,0,1,fp8,fp8,0,0.45497600237528485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,float16,0,0.5059039990107218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,fp8,0,0.512666662534078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,40,8,128,0,1,fp8,fp8,0,0.4591413338979085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,128,0,1,fp8,fp8,0,5.350298563639323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,float16,0,6.892800013224284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,128,0,1,fp8,fp8,0,5.365861256917317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,fp8,0,6.732378641764323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,float16,0,6.809882481892903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,fp8,0,6.730096181233724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,float16,0,3.6563841501871743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,128,0,1,fp8,fp8,0,2.9755681355794272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,fp8,0,3.675269444783529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,float16,0,3.3604211807250977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,128,0,1,fp8,fp8,0,5.419562657674153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,float16,0,6.7729441324869795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,fp8,0,6.910085042317708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,128,0,1,fp8,fp8,0,2.6844587326049805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,fp8,0,3.3301013310750327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,float16,0,3.2218348185221353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,fp8,0,3.391599973042806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,4,128,0,1,fp8,fp8,0,2.69648011525472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,float16,0,3.304197311401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,float16,0,1.792784055074056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,128,0,1,fp8,fp8,0,1.594549338022868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,fp8,0,1.7821332613627117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,128,0,1,fp8,fp8,0,2.727813402811686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,fp8,0,3.4431788126627603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,float16,0,1.642357349395752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,128,0,1,fp8,fp8,0,1.424997329711914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,fp8,0,1.6440587043762207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,float16,0,1.6393973032633464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,fp8,0,1.6327199935913086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,4,128,0,1,fp8,fp8,0,1.3956586519877117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,float16,0,1.7383413314819336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,fp8,0,1.7296640078226726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,float16,0,0.9236586888631185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,fp8,0,0.9388426939646403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,40,8,128,0,1,fp8,fp8,0,1.4266239802042644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,40,128,0,1,fp8,fp8,0,0.8277440071105957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,float16,0,0.8488639990488688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,fp8,0,0.875338633855184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,2,128,0,1,fp8,fp8,0,0.7654453118642172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,float16,0,0.8638186454772949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,fp8,0,0.8617226282755533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,4,128,0,1,fp8,fp8,0,0.7478559811909994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,float16,0,0.8588159879048666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,fp8,0,0.8641440073649088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,float16,0,0.5027039845784506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,40,8,128,0,1,fp8,fp8,0,0.7610452969868978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,fp8,0,0.5120586554209391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,40,128,0,1,fp8,fp8,0,0.4549866517384847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,float16,0,0.4684160153071086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,fp8,0,0.4726346731185913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,2,128,0,1,fp8,fp8,0,0.4214560190836589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,float16,0,0.4736693302790324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,float16,0,0.4768746693929036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,fp8,0,0.4768799940745036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,4,128,0,1,fp8,fp8,0,0.4217386643091838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,fp8,0,0.47820266087849933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,40,8,128,0,1,fp8,fp8,0,0.42608535289764404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,float16,0,0.2991466720898946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,fp8,0,0.30291199684143066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,40,128,0,1,fp8,fp8,0,0.27724266052246094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,float16,0,0.27910399436950684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,fp8,0,0.28068800767262775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,fp8,0,0.27908267577489215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,2,128,0,1,fp8,fp8,0,0.2560960054397583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,float16,0,0.2783893346786499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,4,128,0,1,fp8,fp8,0,0.25837866465250653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,float16,0,0.285045325756073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,fp8,0,0.28356266021728516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,40,8,128,0,1,fp8,fp8,0,0.26020266612370807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,128,0,1,fp8,fp8,0,3.416522661844889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,float16,0,4.245402654012044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,fp8,0,4.206816037495931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,128,0,1,fp8,fp8,0,3.440704027811686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,float16,0,4.1452639897664385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,fp8,0,4.19049612681071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,float16,0,4.2693227132161455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,float16,0,2.2374560038248696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,fp8,0,2.2935733795166016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,40,128,0,1,fp8,fp8,0,1.9771092732747395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,float16,0,2.056309382120768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,fp8,0,2.03439998626709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,128,0,1,fp8,fp8,0,3.505413373311361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,fp8,0,4.3029225667317705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,2,128,0,1,fp8,fp8,0,1.8021653493245442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,float16,0,2.07097593943278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,128,0,1,fp8,fp8,0,1.7995840708414714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,fp8,0,2.0176587104797363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,float16,0,2.1141494115193686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,128,0,1,fp8,fp8,0,1.8389013608296711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,float16,0,1.1370399792989094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,fp8,0,2.0868746439615884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,128,0,1,fp8,fp8,0,1.017743984858195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,fp8,0,1.189733346303304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,float16,0,1.0506400267283122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,fp8,0,1.0515466531117756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,2,128,0,1,fp8,fp8,0,0.9053066571553549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,float16,0,1.0429973602294922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,fp8,0,1.0742080211639404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,4,128,0,1,fp8,fp8,0,0.9099360307057699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,float16,0,1.0670613447825115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,fp8,0,1.06168532371521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,float16,0,0.5991946856180826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,40,8,128,0,1,fp8,fp8,0,0.9325173695882162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,fp8,0,0.6210346619288126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,40,128,0,1,fp8,fp8,0,0.5430933237075806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,float16,0,0.5664000113805135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,fp8,0,0.5557440121968588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,2,128,0,1,fp8,fp8,0,0.4896373351414998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,float16,0,0.557429313659668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,fp8,0,0.5650453170140585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,4,128,0,1,fp8,fp8,0,0.49400532245635986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,float16,0,0.5660800139109293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,fp8,0,0.5647199948628744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,float16,0,0.33852799733479816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,40,8,128,0,1,fp8,fp8,0,0.49726935227711994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,fp8,0,0.34486401081085205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,float16,0,0.31012266874313354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,40,128,0,1,fp8,fp8,0,0.309717337290446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,128,0,1,fp8,fp8,0,0.28359466791152954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,fp8,0,0.3151893417040507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,fp8,0,0.31108800570170086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,float16,0,0.3125973343849182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,4,128,0,1,fp8,fp8,0,0.2843466599782308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,128,0,1,fp8,fp8,0,0.2883146603902181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,float16,0,0.3182026743888855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,fp8,0,0.3203199903170268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,float16,0,0.20773333311080933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,fp8,0,0.21180800596872965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,float16,0,0.19243733088175455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,40,128,0,1,fp8,fp8,0,0.19321600596110025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,fp8,0,0.19344000021616617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,2,128,0,1,fp8,fp8,0,0.17693867286046347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,float16,0,0.1936053236325582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,float16,0,0.19352533419926962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,fp8,0,0.19371734062830606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,4,128,0,1,fp8,fp8,0,0.17713600397109985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,fp8,0,0.19571733474731445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,40,8,128,0,1,fp8,fp8,0,0.1787359913190206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,128,0,1,fp8,fp8,0,3.7799253463745117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,float16,0,4.561727841695149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,fp8,0,4.564106623331706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,128,0,1,fp8,fp8,0,3.793914794921875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,float16,0,4.5558827718098955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,fp8,0,4.5058291753133135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,float16,0,4.62822945912679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,float16,0,2.5123573939005532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,fp8,0,2.5670347213745117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,40,128,0,1,fp8,fp8,0,2.166656017303467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,float16,0,2.210949261983236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,fp8,0,2.23471466700236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,128,0,1,fp8,fp8,0,3.849013328552246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,fp8,0,4.5979359944661455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,2,128,0,1,fp8,fp8,0,1.9572319984436035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,128,0,1,fp8,fp8,0,1.8918453852335613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,float16,0,2.215178648630778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,fp8,0,2.1901067097981772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,float16,0,2.262239933013916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,fp8,0,2.2622933387756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,40,8,128,0,1,fp8,fp8,0,1.9418187141418457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,float16,0,1.2507466475168865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,fp8,0,1.2525386810302734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,40,128,0,1,fp8,fp8,0,1.113264004389445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,float16,0,1.1204266548156738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,fp8,0,1.136181354522705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,2,128,0,1,fp8,fp8,0,0.9701973597208658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,float16,0,1.112831989924113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,128,0,1,fp8,fp8,0,0.9721653461456299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,fp8,0,1.1320106983184814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,float16,0,1.1320373217264812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,fp8,0,1.1375253200531006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,float16,0,0.6457279920578003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,40,8,128,0,1,fp8,fp8,0,0.994101365407308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,fp8,0,0.6575413147608439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,40,128,0,1,fp8,fp8,0,0.580736001332601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,float16,0,0.5868106683095297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,128,0,1,fp8,fp8,0,0.5112959941228231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,fp8,0,0.5854560136795044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,fp8,0,0.5905653238296509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,float16,0,0.5881813367207845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,4,128,0,1,fp8,fp8,0,0.5151040156682333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,float16,0,0.5909706751505533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,fp8,0,0.599183996518453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,float16,0,0.35049064954121906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,40,8,128,0,1,fp8,fp8,0,0.5237226486206055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,fp8,0,0.35835735003153485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,40,128,0,1,fp8,fp8,0,0.31804267565409344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,float16,0,0.31833066542943317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,fp8,0,0.31795734167099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,2,128,0,1,fp8,fp8,0,0.2845653295516968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,float16,0,0.3222133318583171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,fp8,0,0.3240000009536743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,4,128,0,1,fp8,fp8,0,0.28723732630411786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,float16,0,0.3258933424949646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,fp8,0,0.32677332560221356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,40,8,128,0,1,fp8,fp8,0,0.28959999481836957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,float16,0,0.2043573260307312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,fp8,0,0.2085919976234436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,40,128,0,1,fp8,fp8,0,0.1877866586049398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,fp8,0,0.18385599056879678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,float16,0,0.18275733788808188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,fp8,0,0.18389334281285605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,128,0,1,fp8,fp8,0,0.16874132553736368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,float16,0,0.18757333358128866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,2,128,0,1,fp8,fp8,0,0.1662986675898234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,float16,0,0.18291733662287393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,fp8,0,0.1879733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,float16,0,0.12713066736857095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,40,8,128,0,1,fp8,fp8,0,0.17308266957600912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,fp8,0,0.11957333485285442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,fp8,0,0.12925333778063455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,40,128,0,1,fp8,fp8,0,0.1216373344262441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,float16,0,0.11958400408426921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,2,128,0,1,fp8,fp8,0,0.11134933431943257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,float16,0,0.11998933553695679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,float16,0,0.1199679970741272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,fp8,0,0.11955733100573222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,4,128,0,1,fp8,fp8,0,0.11213866869608562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,fp8,0,0.12134933471679688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,40,8,128,0,1,fp8,fp8,0,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,128,0,1,fp8,fp8,0,2.5350826581319175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,float16,0,2.964954694112142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,fp8,0,2.9462881088256836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,float16,0,2.9883572260538735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,128,0,1,fp8,fp8,0,2.551637331644694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,fp8,0,2.9686880111694336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,float16,0,3.0043999354044595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,float16,0,1.6711947123209636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,fp8,0,1.7022666931152344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,40,128,0,1,fp8,fp8,0,1.4699467023213704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,float16,0,1.4750080108642578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,fp8,0,1.4762506484985352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,128,0,1,fp8,fp8,0,2.590165297190348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,fp8,0,2.9992478688557944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,2,128,0,1,fp8,fp8,0,1.269429365793864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,float16,0,1.5062665939331055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,128,0,1,fp8,fp8,0,1.2825760046641033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,fp8,0,1.4856692949930828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,float16,0,1.499285380045573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,float16,0,0.8526453177134196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,fp8,0,1.5055893262227376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,40,8,128,0,1,fp8,fp8,0,1.3087573051452637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,128,0,1,fp8,fp8,0,0.7567520141601562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,fp8,0,0.866159995396932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,float16,0,0.7568906943003336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,fp8,0,0.7606773376464844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,2,128,0,1,fp8,fp8,0,0.6616373459498087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,float16,0,0.7610719998677572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,fp8,0,0.7671626408894857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,4,128,0,1,fp8,fp8,0,0.6647679805755615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,float16,0,0.7710239887237549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,fp8,0,0.7783573468526205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,40,8,128,0,1,fp8,fp8,0,0.6772533257802328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,float16,0,0.447376012802124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,fp8,0,0.45453866322835285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,40,128,0,1,fp8,fp8,0,0.40414400895436603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,float16,0,0.4012480179468791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,fp8,0,0.40405865510304767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,fp8,0,0.4065706729888916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,2,128,0,1,fp8,fp8,0,0.3535733222961426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,float16,0,0.4102240006128947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,float16,0,0.4042559862136841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,4,128,0,1,fp8,fp8,0,0.3551093339920044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,fp8,0,0.41234668095906574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,40,8,128,0,1,fp8,fp8,0,0.3625280062357585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,float16,0,0.2532320022583008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,fp8,0,0.2508693337440491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,40,128,0,1,fp8,fp8,0,0.22462934255599976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,float16,0,0.21938133239746094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,fp8,0,0.2241333325703939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,128,0,1,fp8,fp8,0,0.20163732767105103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,float16,0,0.22672533988952637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,fp8,0,0.22233066956202188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,2,128,0,1,fp8,fp8,0,0.19932266076405844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,float16,0,0.22209600607554117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,fp8,0,0.22946133216222128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,40,8,128,0,1,fp8,fp8,0,0.20475733280181885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,float16,0,0.14710932970046997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,fp8,0,0.15035733580589294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,40,128,0,1,fp8,fp8,0,0.13588800032933554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,float16,0,0.1290826698144277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,fp8,0,0.12990933656692505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,2,128,0,1,fp8,fp8,0,0.11597333351771037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,float16,0,0.13012267152468363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,128,0,1,fp8,fp8,0,0.12057600418726604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,float16,0,0.09360532959302266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,fp8,0,0.13288000226020813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,4,128,0,1,fp8,fp8,0,0.11726400256156921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,float16,0,0.1318186620871226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,fp8,0,0.13160000244776407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,fp8,0,0.09559466441472371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,float16,0,0.09091732899347942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,40,128,0,1,fp8,fp8,0,0.08968533078829448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,128,0,1,fp8,fp8,0,0.08278400202592213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,float16,0,0.09032000104586284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,fp8,0,0.08974400162696838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,2,128,0,1,fp8,fp8,0,0.08266133566697438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,fp8,0,0.09101866682370503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,float16,0,0.08986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,fp8,0,0.09091732899347942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,40,8,128,0,1,fp8,fp8,0,0.08264533181985219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,float16,0,3.1306241353352866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,128,0,1,fp8,fp8,0,2.881861368815104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,fp8,0,3.124095916748047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,float16,0,3.202255884806315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,fp8,0,3.2255147298177085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,4,128,0,1,fp8,fp8,0,3.079648017883301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,float16,0,1.8438506126403809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,fp8,0,1.8068639437357585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,40,128,0,1,fp8,fp8,0,1.7022026379903157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,128,0,1,fp8,fp8,0,3.0953601201375327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,float16,0,3.3800106048583984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,fp8,0,3.367717425028483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,float16,0,1.579114596048991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,fp8,0,1.595647970835368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,2,128,0,1,fp8,fp8,0,1.4339307149251301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,float16,0,1.5911626815795898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,128,0,1,fp8,fp8,0,1.4595200220743816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,fp8,0,1.593781312306722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,float16,0,1.6295626958211262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,float16,0,0.9283200105031332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,fp8,0,0.9029973347981771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,fp8,0,1.615429401397705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,40,128,0,1,fp8,fp8,0,0.8619093100229899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,float16,0,0.7975680033365885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,40,8,128,0,1,fp8,fp8,0,1.558143933614095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,fp8,0,0.7980159918467203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,2,128,0,1,fp8,fp8,0,0.7276586691538492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,float16,0,0.8071946303049723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,fp8,0,0.8038773536682129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,4,128,0,1,fp8,fp8,0,0.7325812975565592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,float16,0,0.8218293190002441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,float16,0,0.4761173327763875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,fp8,0,0.4647039969762166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,fp8,0,0.8157386779785156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,40,8,128,0,1,fp8,fp8,0,0.750485340754191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,40,128,0,1,fp8,fp8,0,0.44304001331329346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,float16,0,0.4087466796239217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,fp8,0,0.4119733174641927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,fp8,0,0.4169280131657918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,2,128,0,1,fp8,fp8,0,0.36660265922546387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,float16,0,0.4157866636912028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,4,128,0,1,fp8,fp8,0,0.37723731994628906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,float16,0,0.42060800393422443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,fp8,0,0.4199306567509969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,40,8,128,0,1,fp8,fp8,0,0.3840906620025635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,float16,0,0.25307732820510864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,fp8,0,0.2456373373667399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,40,128,0,1,fp8,fp8,0,0.23355732361475626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,float16,0,0.21772799889246622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,fp8,0,0.22197866439819336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,fp8,0,0.21927465995152792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,2,128,0,1,fp8,fp8,0,0.1943733294804891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,float16,0,0.22304532925287882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,4,128,0,1,fp8,fp8,0,0.20150399208068848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,float16,0,0.2241119941075643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,fp8,0,0.22426132361094156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,40,8,128,0,1,fp8,fp8,0,0.2050293286641439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,float16,0,0.1423679987589518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,fp8,0,0.1373973290125529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,40,128,0,1,fp8,fp8,0,0.13180266817410788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,float16,0,0.12036266922950745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,fp8,0,0.12016000350316365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,2,128,0,1,fp8,fp8,0,0.10518399874369304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,fp8,0,0.12229866782824199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,float16,0,0.1206773320833842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,fp8,0,0.12198399504025777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,fp8,0,0.07708799839019775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,4,128,0,1,fp8,fp8,0,0.10934399565060933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,float16,0,0.12298666437466939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,40,8,128,0,1,fp8,fp8,0,0.11571733156840007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,float16,0,0.07937066753705342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,40,128,0,1,fp8,fp8,0,0.07666666805744171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,float16,0,0.07111999889214833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,fp8,0,0.07034666836261749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,float16,0,0.07189866900444031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,2,128,0,1,fp8,fp8,0,0.06300800045331319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,float16,0,0.07222933570543925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,fp8,0,0.07222400108973186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,4,128,0,1,fp8,fp8,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,fp8,0,0.07047466437021892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,40,8,128,0,1,fp8,fp8,0,0.0629120022058487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,float16,0,0.0521066685517629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,fp8,0,0.05234666665395101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,40,128,0,1,fp8,fp8,0,0.04711999992529551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,float16,0,0.05061866839726766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,128,0,1,fp8,fp8,0,0.04423999786376953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,fp8,0,0.05054933329423269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,128,0,1,fp8,fp8,0,0.0440533310174942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,2,128,0,1,fp8,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,float16,0,0.05048533280690511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,float16,0,0.04985600213209788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,float16,0,2.6828959782918296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,fp8,0,2.680095990498861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,2,128,0,1,fp8,fp8,0,2.4724586804707847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,float16,0,2.7082560857137046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,fp8,0,2.7295252482096353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,4,128,0,1,fp8,fp8,0,2.6621386210123696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,float16,0,2.9060532251993814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,float16,0,1.5829173723856609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,fp8,0,1.5512480735778809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,40,128,0,1,fp8,fp8,0,1.5
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,float16,0,1.354490598042806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,128,0,1,fp8,fp8,0,2.6914345423380532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,fp8,0,2.912501335144043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,fp8,0,1.350554625193278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,2,128,0,1,fp8,fp8,0,1.2329813639322917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,float16,0,1.3657973607381184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,fp8,0,1.3664587338765461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,4,128,0,1,fp8,fp8,0,1.2752532958984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,float16,0,1.3977333704630535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,float16,0,0.8041120370229086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,fp8,0,0.7846773465474447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,fp8,0,1.3881440162658691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,40,128,0,1,fp8,fp8,0,0.7559146881103516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,40,8,128,0,1,fp8,fp8,0,1.3569119771321614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,float16,0,0.6833066940307617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,fp8,0,0.6839146614074707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,2,128,0,1,fp8,fp8,0,0.6239306529362997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,float16,0,0.6905972957611084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,fp8,0,0.6908373037974039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,4,128,0,1,fp8,fp8,0,0.6298026641209921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,float16,0,0.7032106717427572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,float16,0,0.41539200146993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,fp8,0,0.7029759883880615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,128,0,1,fp8,fp8,0,0.3883253335952759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,40,8,128,0,1,fp8,fp8,0,0.643178661664327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,fp8,0,0.351365327835083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,fp8,0,0.4036053419113159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,float16,0,0.35132265090942383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,2,128,0,1,fp8,fp8,0,0.3134400049845378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,float16,0,0.3566133181254069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,fp8,0,0.3565760056177775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,128,0,1,fp8,fp8,0,0.32954132556915283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,float16,0,0.21897600094477335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,4,128,0,1,fp8,fp8,0,0.32261866331100464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,float16,0,0.3616480032602946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,fp8,0,0.36081600189208984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,fp8,0,0.21356266736984253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,float16,0,0.1869759956995646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,40,128,0,1,fp8,fp8,0,0.20543466011683145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,fp8,0,0.1872373421986898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,2,128,0,1,fp8,fp8,0,0.1667520006497701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,float16,0,0.18915732701619467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,fp8,0,0.18932799498240152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,4,128,0,1,fp8,fp8,0,0.17188799381256104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,float16,0,0.19298134247461954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,128,0,1,fp8,fp8,0,0.11544000109036763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,fp8,0,0.19300800561904907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,40,8,128,0,1,fp8,fp8,0,0.175546665986379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,float16,0,0.12297067046165466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,fp8,0,0.12113066514333089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,float16,0,0.10244266192118327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,fp8,0,0.1030560036500295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,2,128,0,1,fp8,fp8,0,0.09090133508046468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,float16,0,0.10319999853769939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,fp8,0,0.10386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,4,128,0,1,fp8,fp8,0,0.09442133704821269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,fp8,0,0.06785066425800323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,float16,0,0.1053546667098999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,fp8,0,0.10699733098347981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,40,8,128,0,1,fp8,fp8,0,0.09874133268992107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,float16,0,0.0699786643187205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,40,128,0,1,fp8,fp8,0,0.06806933383146922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,fp8,0,0.0618399977684021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,float16,0,0.062309334675470986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,fp8,0,0.06202666461467743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,float16,0,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,2,128,0,1,fp8,fp8,0,0.05276800195376078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,float16,0,0.06216000020503998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,4,128,0,1,fp8,fp8,0,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,fp8,0,0.06196266909440359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,40,8,128,0,1,fp8,fp8,0,0.0537066658337911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,fp8,0,0.043968002001444496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,128,0,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,float16,0,0.045509333411852516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,fp8,0,0.045456002155939736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,40,128,0,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,float16,0,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,float16,0,0.043738668163617454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,4,128,0,1,fp8,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,fp8,0,0.032730666299661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,float16,0,0.04448533554871877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,float16,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,128,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,40,8,128,0,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,40,128,0,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,4,128,0,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,fp8,0,0.03050133337577184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,40,8,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,float16,0,1.2217546304066975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,fp8,0,1.2213599681854248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,2,128,0,1,fp8,fp8,0,1.125050703684489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,float16,0,1.2320640087127686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,fp8,0,1.2326080004374187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,4,128,0,1,fp8,fp8,0,1.1707359949747722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,float16,0,1.2653706868489583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,float16,0,0.7341492970784506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,fp8,0,1.2541173299153645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,fp8,0,0.7147413094838461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,40,8,128,0,1,fp8,fp8,0,1.2489759922027588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,40,128,0,1,fp8,fp8,0,0.7015519936879476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,float16,0,0.6158080101013184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,fp8,0,0.6154719988505045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,2,128,0,1,fp8,fp8,0,0.5636586745580038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,float16,0,0.6239840189615885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,fp8,0,0.6262506643931071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,4,128,0,1,fp8,fp8,0,0.5721706549326578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,float16,0,0.6369706789652506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,fp8,0,0.6338826815287272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,float16,0,0.37940800189971924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,40,8,128,0,1,fp8,fp8,0,0.5868586699167887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,fp8,0,0.3709973494211833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,40,128,0,1,fp8,fp8,0,0.36246931552886963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,float16,0,0.31989334026972455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,128,0,1,fp8,fp8,0,0.2839413285255432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,fp8,0,0.31877867380777997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,float16,0,0.3245866696039836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,fp8,0,0.32496533791224164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,4,128,0,1,fp8,fp8,0,0.2956266601880391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,float16,0,0.3292693297068278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,fp8,0,0.3283466696739197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,float16,0,0.19953600565592447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,40,8,128,0,1,fp8,fp8,0,0.3001226584116618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,fp8,0,0.1955733299255371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,40,128,0,1,fp8,fp8,0,0.19050665696461996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,float16,0,0.16808533668518066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,fp8,0,0.1687999963760376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,2,128,0,1,fp8,fp8,0,0.15027733643849692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,float16,0,0.16976000865300497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,fp8,0,0.17076265811920166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,float16,0,0.11384532848993938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,4,128,0,1,fp8,fp8,0,0.15480533242225647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,float16,0,0.17433067162831625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,fp8,0,0.17249067624409994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,40,8,128,0,1,fp8,fp8,0,0.15997866789499918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,fp8,0,0.11134933431943257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,40,128,0,1,fp8,fp8,0,0.10822932918866475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,float16,0,0.09327999750773112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,fp8,0,0.09450667103131612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,float16,0,0.09702400366465251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,2,128,0,1,fp8,fp8,0,0.08230400085449219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,float16,0,0.09476799766222636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,float16,0,0.06420266628265381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,fp8,0,0.09344533085823059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,4,128,0,1,fp8,fp8,0,0.08613866567611694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,fp8,0,0.09670933087666829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,40,8,128,0,1,fp8,fp8,0,0.08861866593360901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,fp8,0,0.0631520003080368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,40,128,0,1,fp8,fp8,0,0.06427733103434245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,float16,0,0.05663999915122986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,fp8,0,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,float16,0,0.057775999108950295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,2,128,0,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,float16,0,0.05778133372465769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,fp8,0,0.05781333148479462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,4,128,0,1,fp8,fp8,0,0.04956800242265066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,fp8,0,0.058042665322621666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,40,8,128,0,1,fp8,fp8,0,0.049600000182787575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,float16,0,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,40,128,0,1,fp8,fp8,0,0.03771200031042099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,float16,0,0.040394666294256844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,128,0,1,fp8,fp8,0,0.03602133442958196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,fp8,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,2,128,0,1,fp8,fp8,0,0.03494933247566223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,float16,0,0.040661332507928215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,fp8,0,0.039861333866914116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,float16,0,0.04114133367935816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,fp8,0,0.04089066634575526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,40,8,128,0,1,fp8,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,float16,0,0.027850667635599773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,40,128,0,1,fp8,fp8,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,2,128,0,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,4,128,0,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,fp8,0,0.028192001084486645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,40,8,128,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,float16,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,40,128,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,fp8,0,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,2,128,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,4,128,0,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,40,8,128,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,float16,0,0.673472007115682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,fp8,0,0.6724747021993002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,2,128,0,1,fp8,fp8,0,0.6212426821390787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,float16,0,0.6815573374430338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,fp8,0,0.6822880109151205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,4,128,0,1,fp8,fp8,0,0.6226720015207926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,float16,0,0.6946986516316732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,128,0,1,fp8,fp8,0,0.6377280155817667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,fp8,0,0.6930239995320638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,float16,0,0.4043360153834025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,128,0,1,fp8,fp8,0,0.38445866107940674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,fp8,0,0.39528000354766846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,float16,0,0.34486401081085205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,fp8,0,0.34352533022562665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,2,128,0,1,fp8,fp8,0,0.3070613344510396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,float16,0,0.3490560054779053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,fp8,0,0.34904531637827557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,4,128,0,1,fp8,fp8,0,0.318938672542572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,float16,0,0.35436801115671795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,fp8,0,0.3526080052057902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,40,8,128,0,1,fp8,fp8,0,0.32473599910736084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,float16,0,0.21241599321365356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,fp8,0,0.20787199338277182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,40,128,0,1,fp8,fp8,0,0.20118399461110434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,float16,0,0.18263999621073404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,fp8,0,0.18147200345993042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,2,128,0,1,fp8,fp8,0,0.16416000326474509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,float16,0,0.18681599696477255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,float16,0,0.18389334281285605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,fp8,0,0.18412800629933676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,4,128,0,1,fp8,fp8,0,0.16872000694274902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,128,0,1,fp8,fp8,0,0.11147733529408772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,fp8,0,0.18779732783635458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,40,8,128,0,1,fp8,fp8,0,0.17196800311406454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,float16,0,0.11458667119344075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,fp8,0,0.11318399508794148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,float16,0,0.09734933574994405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,128,0,1,fp8,fp8,0,0.08851733803749084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,fp8,0,0.09828266501426697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,2,128,0,1,fp8,fp8,0,0.08624000350634257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,float16,0,0.09897599617640178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,fp8,0,0.09880533814430237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,float16,0,0.10109333197275798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,fp8,0,0.10081066687901814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,40,8,128,0,1,fp8,fp8,0,0.09261332949002583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,float16,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,fp8,0,0.062218666076660156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,40,128,0,1,fp8,fp8,0,0.06585066517194112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,float16,0,0.056741332014401756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,fp8,0,0.057802667220433555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,float16,0,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,2,128,0,1,fp8,fp8,0,0.05101866523424784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,128,0,1,fp8,fp8,0,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,float16,0,0.05762133498986562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,fp8,0,0.057562669118245445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,4,128,0,1,fp8,fp8,0,0.0517546683549881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,fp8,0,0.05783999959627787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,float16,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,40,128,0,1,fp8,fp8,0,0.03793599953254064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,float16,0,0.03845333307981491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,2,128,0,1,fp8,fp8,0,0.03502399971087774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,float16,0,0.038746667404969536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,fp8,0,0.038746667404969536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,float16,0,0.02890666574239731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,4,128,0,1,fp8,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,float16,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,fp8,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,40,8,128,0,1,fp8,fp8,0,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,fp8,0,0.03018666555484136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,40,128,0,1,fp8,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,float16,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,2,128,0,1,fp8,fp8,0,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,float16,0,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,4,128,0,1,fp8,fp8,0,0.025733334322770435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,fp8,0,0.02887466549873352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,40,8,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,float16,0,0.02037866661945979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,128,0,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,40,128,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,fp8,0,0.020410666863123577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,40,8,128,0,1,fp8,fp8,0,0.02004266654451688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,40,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,float16,0,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,4,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,float16,0,0.44043199221293133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,40,8,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,fp8,0,0.4400800069173177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,2,128,0,1,fp8,fp8,0,0.3980640172958374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,float16,0,0.4465493361155192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,fp8,0,0.4460800091425578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,4,128,0,1,fp8,fp8,0,0.40832531452178955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,float16,0,0.45021867752075195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,fp8,0,0.4493173360824585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,float16,0,0.2587520082791646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,fp8,0,0.2526986598968506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,40,8,128,0,1,fp8,fp8,0,0.4150559902191162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,40,128,0,1,fp8,fp8,0,0.2442506750424703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,float16,0,0.2280799945195516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,fp8,0,0.22808533906936646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,float16,0,0.23050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,fp8,0,0.2315946618715922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,2,128,0,1,fp8,fp8,0,0.20751466353734335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,4,128,0,1,fp8,fp8,0,0.21354132890701294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,float16,0,0.23423999547958374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,fp8,0,0.23265600204467773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,40,8,128,0,1,fp8,fp8,0,0.2157599925994873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,float16,0,0.1394719978173574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,fp8,0,0.13591999808947244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,40,128,0,1,fp8,fp8,0,0.13447466492652893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,float16,0,0.12364266316095988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,float16,0,0.12330133716265361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,fp8,0,0.12327466408411662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,2,128,0,1,fp8,fp8,0,0.10938133796056111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,fp8,0,0.12331733107566833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,4,128,0,1,fp8,fp8,0,0.11290132999420166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,float16,0,0.12541332840919495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,fp8,0,0.1255626678466797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,float16,0,0.0682239979505539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,40,8,128,0,1,fp8,fp8,0,0.11544000109036763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,float16,0,0.07414933542410533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,128,0,1,fp8,fp8,0,0.07446933289368947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,fp8,0,0.07375999788443248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,fp8,0,0.06800533334414165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,2,128,0,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,fp8,0,0.06861333549022675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,float16,0,0.06820266445477803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,fp8,0,0.06763199965159099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,4,128,0,1,fp8,fp8,0,0.0603359987338384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,128,0,1,fp8,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,float16,0,0.06811200082302094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,40,8,128,0,1,fp8,fp8,0,0.06057066718737284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,float16,0,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,float16,0,0.04353600243727366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,fp8,0,0.04576533536116282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,128,0,1,fp8,fp8,0,0.03869866579771042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,fp8,0,0.0430026650428772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,2,128,0,1,fp8,fp8,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,float16,0,0.02995733420054118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,fp8,0,0.043621331453323364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,float16,0,0.043791999419530235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,fp8,0,0.043712000052134194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,40,8,128,0,1,fp8,fp8,0,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,40,128,0,1,fp8,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,fp8,0,0.030453334252039593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,2,128,0,1,fp8,fp8,0,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,float16,0,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,4,128,0,1,fp8,fp8,0,0.027930667002995808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,fp8,0,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,40,8,128,0,1,fp8,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,40,128,0,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,2,128,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,4,128,0,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,40,8,128,0,1,fp8,fp8,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,40,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,float16,0,0.01782400036851565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,float16,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,4,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,40,8,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,40,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,2,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,4,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,40,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,float16,0,0.3265013297398885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,fp8,0,0.3243359923362732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,float16,0,0.32789866129557294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,2,128,0,1,fp8,fp8,0,0.3024853269259135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,fp8,0,0.3282453417778015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,4,128,0,1,fp8,fp8,0,0.308079997698466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,float16,0,0.331061323483785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,float16,0,0.18571199973424277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,fp8,0,0.3302239974339803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,40,8,128,0,1,fp8,fp8,0,0.3118346730868022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,fp8,0,0.1835306684176127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,40,128,0,1,fp8,fp8,0,0.18163732687632242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,float16,0,0.1718026598294576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,fp8,0,0.17071467638015747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,2,128,0,1,fp8,fp8,0,0.1562026639779409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,float16,0,0.1716746687889099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,fp8,0,0.17223467429478964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,4,128,0,1,fp8,fp8,0,0.15942933162053427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,float16,0,0.1729066570599874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,fp8,0,0.1727893352508545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,40,8,128,0,1,fp8,fp8,0,0.16368533174196878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,float16,0,0.09885332981745402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,fp8,0,0.09874666730562846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,40,128,0,1,fp8,fp8,0,0.09891200065612793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,fp8,0,0.0925600032011668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,float16,0,0.09191466371218364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,fp8,0,0.09341866771380107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,float16,0,0.0922986666361491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,2,128,0,1,fp8,fp8,0,0.08453333377838135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,4,128,0,1,fp8,fp8,0,0.0851093331972758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,float16,0,0.09338666995366414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,fp8,0,0.09403733412424724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,40,8,128,0,1,fp8,fp8,0,0.08469866712888081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,fp8,0,0.05448000133037567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,float16,0,0.055999999245007835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,fp8,0,0.05606399973233541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,40,128,0,1,fp8,fp8,0,0.05345066885153452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,128,0,1,fp8,fp8,0,0.049642667174339294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,float16,0,0.054474666714668274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,2,128,0,1,fp8,fp8,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,float16,0,0.05372266471385956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,float16,0,0.05468800167242686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,128,0,1,fp8,fp8,0,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,fp8,0,0.05421333511670431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,fp8,0,0.035301332672437034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,40,8,128,0,1,fp8,fp8,0,0.04958933095137278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,float16,0,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,fp8,0,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,float16,0,0.034629332522551216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,2,128,0,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,fp8,0,0.03554133325815201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,float16,0,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,4,128,0,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,128,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,float16,0,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,40,8,128,0,1,fp8,fp8,0,0.03399466723203659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,float16,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,2,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,fp8,0,0.025786665578683216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,4,128,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,40,8,128,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,40,128,0,1,fp8,fp8,0,0.01964266722400983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,float16,0,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,2,128,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,4,128,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,40,8,128,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,40,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,2,128,0,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,4,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,float16,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,40,8,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,40,128,0,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,2,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,4,128,0,1,fp8,fp8,0,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,40,8,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,fp8,0,0.26683733860651654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,float16,0,0.2669279972712199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,2,128,0,1,fp8,fp8,0,0.2530133326848348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,float16,0,0.2677599986394246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,float16,0,0.2693546613057454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,fp8,0,0.26683733860651654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,4,128,0,1,fp8,fp8,0,0.2569013237953186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,fp8,0,0.26743467648824054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,float16,0,0.14646400014559427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,40,8,128,0,1,fp8,fp8,0,0.25896533330281574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,fp8,0,0.14657599727312723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,40,128,0,1,fp8,fp8,0,0.14756266276041666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,float16,0,0.13921067118644714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,fp8,0,0.13985600074132284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,128,0,1,fp8,fp8,0,0.13251733779907227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,2,128,0,1,fp8,fp8,0,0.13157332936922708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,float16,0,0.14119999607404074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,fp8,0,0.13985066612561545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,float16,0,0.1407360037167867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,fp8,0,0.1397760013739268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,40,8,128,0,1,fp8,fp8,0,0.13301866253217062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,float16,0,0.08006399869918823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,fp8,0,0.08037333190441132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,40,128,0,1,fp8,fp8,0,0.0787306676308314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,float16,0,0.07901333272457123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,fp8,0,0.07872533301512401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,2,128,0,1,fp8,fp8,0,0.07457066575686137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,fp8,0,0.07871999839941661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,float16,0,0.0790826678276062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,fp8,0,0.07845866680145264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,4,128,0,1,fp8,fp8,0,0.07467733323574066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,float16,0,0.07918400069077809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,float16,0,0.0465280016263326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,40,8,128,0,1,fp8,fp8,0,0.07448000212510426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,float16,0,0.048810665806134544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,40,128,0,1,fp8,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,2,128,0,1,fp8,fp8,0,0.045514668027559914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,float16,0,0.04614399870236715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,4,128,0,1,fp8,fp8,0,0.04565866788228353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,float16,0,0.04629333317279816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,fp8,0,0.047872001926104225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,40,8,128,0,1,fp8,fp8,0,0.044266665975252785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,float16,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,40,128,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,float16,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,2,128,0,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,float16,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,4,128,0,1,fp8,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,40,8,128,0,1,fp8,fp8,0,0.030192000170548756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,40,128,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,2,128,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,4,128,0,1,fp8,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,40,8,128,0,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,40,128,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,2,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,4,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,float16,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,40,8,128,0,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,40,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,2,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,4,128,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,40,8,128,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,40,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,2,128,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,4,128,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,40,8,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,128,0,1,fp8,fp8,0,11.982410430908203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,128,0,1,fp8,fp8,0,11.985445658365885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,float16,0,15.33675765991211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,fp8,0,15.458763122558594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,float16,0,15.54425048828125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,fp8,0,15.4378293355306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,float16,0,15.34646987915039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,float16,0,8.307477315266928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,128,0,1,fp8,fp8,0,12.276570638020834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,fp8,0,8.325178782145182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,fp8,0,16.020511627197266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,128,0,1,fp8,fp8,0,12.301483154296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,float16,0,16.019444783528645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,fp8,0,15.824751536051432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,128,0,1,fp8,fp8,0,6.339290618896484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,float16,0,7.962111790974935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,fp8,0,7.77244249979655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,128,0,1,fp8,fp8,0,6.006528218587239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,float16,0,7.92303466796875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,128,0,1,fp8,fp8,0,6.13700803120931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,fp8,0,7.998421351114909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,float16,0,7.835898717244466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,128,0,1,fp8,fp8,0,6.034101486206055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,fp8,0,7.931856155395508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,float16,0,7.884298960367839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,float16,0,4.09991455078125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,fp8,0,4.194682757059733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,128,0,1,fp8,fp8,0,3.304234822591146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,fp8,0,7.969658533732097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,128,0,1,fp8,fp8,0,6.300591786702474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,float16,0,4.034976005554199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,128,0,1,fp8,fp8,0,3.1646505991617837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,fp8,0,3.854976018269857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,float16,0,3.7919254302978516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,128,0,1,fp8,fp8,0,3.162266731262207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,fp8,0,3.946448008219401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,float16,0,4.199514706929524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,fp8,0,3.9758987426757812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,128,0,1,fp8,fp8,0,3.1152159372965493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,float16,0,2.0658507347106934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,fp8,0,2.1243413289388022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,float16,0,4.142458597819011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,128,0,1,fp8,fp8,0,3.1422293980916343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,fp8,0,4.108864148457845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,128,0,1,fp8,fp8,0,1.910373369852702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,float16,0,2.0310239791870117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,fp8,0,2.003994623819987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,128,0,1,fp8,fp8,0,1.82585604985555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,float16,0,1.9892640113830566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,fp8,0,1.9826186498006184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,128,0,1,fp8,fp8,0,1.885493278503418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,float16,0,1.9740746815999348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,fp8,0,2.032048066457113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,128,0,1,fp8,fp8,0,1.7492480278015137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,float16,0,2.089285373687744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,128,0,1,fp8,fp8,0,1.8209919929504395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,fp8,0,1.9981172879536946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,128,0,1,fp8,fp8,0,7.112746556599935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,float16,0,9.167488098144531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,fp8,0,9.2118771870931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,128,0,1,fp8,fp8,0,7.060480117797852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,float16,0,9.016288121541342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,fp8,0,9.167365392049154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,float16,0,9.362725575764975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,float16,0,5.064682642618815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,128,0,1,fp8,fp8,0,7.154186884562175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,fp8,0,4.730405489603679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,fp8,0,9.402458826700846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,128,0,1,fp8,fp8,0,7.274074554443359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,float16,0,9.322170893351236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,fp8,0,9.494906743367514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,128,0,1,fp8,fp8,0,4.008565266927083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,float16,0,4.496629397074382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,fp8,0,4.577989260355632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,128,0,1,fp8,fp8,0,3.6043628056844077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,float16,0,4.798447926839192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,128,0,1,fp8,fp8,0,3.666639963785807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,fp8,0,4.685759862263997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,float16,0,4.66484260559082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,128,0,1,fp8,fp8,0,3.6553173065185547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,fp8,0,4.549423853556315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,float16,0,4.593008041381836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,128,0,1,fp8,fp8,0,3.6405601501464844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,fp8,0,4.792853355407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,float16,0,2.4095147450764975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,fp8,0,2.433509349822998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,128,0,1,fp8,fp8,0,2.382746696472168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,fp8,0,2.2730773289998374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,float16,0,2.5170987447102866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,128,0,1,fp8,fp8,0,2.078810691833496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,float16,0,2.2855146725972495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,128,0,1,fp8,fp8,0,1.8993066151936848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,fp8,0,2.3107946713765464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,float16,0,2.2495786348978677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,128,0,1,fp8,fp8,0,1.9654080073038738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,fp8,0,2.2223307291666665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,float16,0,1.3814293543497722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,float16,0,2.263381322224935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,128,0,1,fp8,fp8,0,1.9166666666666667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,fp8,0,2.4428586959838867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,fp8,0,1.3697439829508464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,128,0,1,fp8,fp8,0,1.1062239805857341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,float16,0,1.3023306528727214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,fp8,0,1.290560007095337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,128,0,1,fp8,fp8,0,1.2159573237101238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,float16,0,1.2141706943511963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,fp8,0,1.3407893180847168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,128,0,1,fp8,fp8,0,1.0743146737416585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,float16,0,1.2147093613942463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,fp8,0,1.2315359910329182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,128,0,1,fp8,fp8,0,1.1050506432851155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,float16,0,1.229530652364095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,fp8,0,1.2224266529083252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,128,0,1,fp8,fp8,0,1.1297813256581624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,128,0,1,fp8,fp8,0,5.077808062235515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,128,0,1,fp8,fp8,0,5.0355574289957685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,float16,0,6.4846343994140625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,fp8,0,6.516288121541341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,float16,0,6.560362497965495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,fp8,0,6.700944264729817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,float16,0,6.633792241414388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,float16,0,3.308992067972819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,fp8,0,3.550095876057943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,128,0,1,fp8,fp8,0,5.163685480753581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,fp8,0,6.688234965006511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,128,0,1,fp8,fp8,0,5.126757303873698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,fp8,0,6.614202499389648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,float16,0,6.662341435750325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,128,0,1,fp8,fp8,0,2.862250645955404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,fp8,0,3.353290557861328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,float16,0,3.2358185450236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,128,0,1,fp8,fp8,0,2.6497813860575357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,float16,0,3.2256479263305664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,128,0,1,fp8,fp8,0,2.622133255004883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,fp8,0,3.3493439356486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,float16,0,3.1253493626912436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,128,0,1,fp8,fp8,0,2.7219359079996743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,fp8,0,3.361845334370931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,float16,0,3.386757214864095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,float16,0,1.7253492673238118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,128,0,1,fp8,fp8,0,2.6235626538594565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,fp8,0,3.423247973124186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,fp8,0,1.7437920570373535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,128,0,1,fp8,fp8,0,1.4795145988464355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,float16,0,1.7659626007080078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,128,0,1,fp8,fp8,0,1.460693359375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,fp8,0,1.883306662241618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,float16,0,1.6096107165018718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,128,0,1,fp8,fp8,0,1.4819787343343098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,fp8,0,1.8446933428446453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,float16,0,1.6620799700419109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,128,0,1,fp8,fp8,0,1.4090347290039062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,fp8,0,1.6862506866455078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,float16,0,1.6307679812113445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,fp8,0,1.6588266690572102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,float16,0,1.0002346833546956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,128,0,1,fp8,fp8,0,1.4173280398050945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,fp8,0,1.0787413120269775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,128,0,1,fp8,fp8,0,0.8324106534322103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,fp8,0,0.9469546476999918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,float16,0,0.9047253131866455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,128,0,1,fp8,fp8,0,0.9369386831919352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,float16,0,0.9102986653645834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,fp8,0,0.8951413631439209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,128,0,1,fp8,fp8,0,0.8686400254567465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,float16,0,0.899247964223226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,fp8,0,0.9317440191904703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,128,0,1,fp8,fp8,0,0.8021066983540853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,float16,0,0.9102400143941244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,fp8,0,0.9153493245442709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,128,0,1,fp8,fp8,0,0.8049600124359131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,128,0,1,fp8,fp8,0,6.876527786254883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,128,0,1,fp8,fp8,0,6.9567413330078125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,float16,0,8.65446917215983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,fp8,0,8.666666666666666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,float16,0,8.631146748860678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,fp8,0,8.817871729532877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,float16,0,8.799872080485025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,float16,0,4.7193654378255205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,128,0,1,fp8,fp8,0,7.046143849690755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,fp8,0,8.917664210001627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,128,0,1,fp8,fp8,0,7.078218460083008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,float16,0,8.972442626953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,fp8,0,9.08459726969401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,128,0,1,fp8,fp8,0,3.769589424133301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,fp8,0,4.899157206217448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,float16,0,4.447386741638184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,128,0,1,fp8,fp8,0,3.4092747370402017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,fp8,0,4.372485478719075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,float16,0,4.493882815043132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,fp8,0,4.4074398676554365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,128,0,1,fp8,fp8,0,3.4598719278971353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,float16,0,4.355754534403483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,128,0,1,fp8,fp8,0,3.4333972930908203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,fp8,0,4.391466776529948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,float16,0,4.456682523091634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,float16,0,2.3353706995646157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,128,0,1,fp8,fp8,0,1.8978986740112305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,128,0,1,fp8,fp8,0,3.5017760594685874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,fp8,0,2.4148213068644204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,fp8,0,4.396986643473308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,float16,0,2.119919935862223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,fp8,0,2.1354026794433594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,128,0,1,fp8,fp8,0,1.9896853764851887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,float16,0,2.136064052581787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,fp8,0,2.102506637573242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,128,0,1,fp8,fp8,0,1.9095679918924968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,float16,0,2.1325759887695312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,128,0,1,fp8,fp8,0,1.8935519854227703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,fp8,0,2.136511961619059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,float16,0,2.183274745941162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,float16,0,1.1619679927825928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,fp8,0,1.2269919713338215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,128,0,1,fp8,fp8,0,1.8388853073120117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,128,0,1,fp8,fp8,0,1.0440320173899333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,fp8,0,2.1077653566996255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,float16,0,1.243839979171753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,128,0,1,fp8,fp8,0,0.9732480049133301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,fp8,0,1.1508373419443767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,float16,0,1.1216053167978923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,fp8,0,1.1134080092112224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,128,0,1,fp8,fp8,0,0.9754400253295898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,float16,0,1.1316266854604085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,128,0,1,fp8,fp8,0,0.9768053690592448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,fp8,0,1.1169280211130779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,float16,0,1.1449120044708252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,float16,0,0.6548106670379639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,fp8,0,1.133903980255127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,128,0,1,fp8,fp8,0,0.9852853616078695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,fp8,0,0.6866772969563802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,128,0,1,fp8,fp8,0,0.5915306806564331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,float16,0,0.6369226773579916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,fp8,0,0.6490240097045898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,float16,0,0.6276799837748209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,128,0,1,fp8,fp8,0,0.564789334932963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,fp8,0,0.6261173486709595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,128,0,1,fp8,fp8,0,0.5727946758270264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,float16,0,0.628549337387085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,fp8,0,0.6322666803995768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,128,0,1,fp8,fp8,0,0.5778559843699137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,float16,0,0.6348533233006796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,fp8,0,0.6387360095977783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,128,0,1,fp8,fp8,0,0.5731893380482992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,128,0,1,fp8,fp8,0,4.159637451171875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,128,0,1,fp8,fp8,0,4.192394574483235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,float16,0,5.259909311930339
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,fp8,0,5.227109273274739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,float16,0,5.243706703186035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,fp8,0,5.229786554972331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,float16,0,5.265754699707031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,float16,0,2.660266717274984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,fp8,0,2.768207867940267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,128,0,1,fp8,fp8,0,4.231797218322754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,fp8,0,5.126495997111003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,128,0,1,fp8,fp8,0,4.22162659962972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,float16,0,5.212944030761719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,fp8,0,5.407034556070964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,128,0,1,fp8,fp8,0,2.403589407602946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,float16,0,2.4821653366088867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,fp8,0,2.6086506843566895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,128,0,1,fp8,fp8,0,2.3532800674438477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,float16,0,2.5456533432006836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,128,0,1,fp8,fp8,0,2.302581310272217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,fp8,0,2.5007893244425454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,float16,0,2.621631940205892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,128,0,1,fp8,fp8,0,2.1998400688171387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,fp8,0,2.6152426401774087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,float16,0,2.525935967763265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,float16,0,1.366645336151123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,fp8,0,2.582245349884033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,128,0,1,fp8,fp8,0,2.157477378845215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,fp8,0,1.556074619293213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,128,0,1,fp8,fp8,0,1.218069314956665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,float16,0,1.4416693051656086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,128,0,1,fp8,fp8,0,1.1231359640757244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,fp8,0,1.3599626223246257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,float16,0,1.3353652954101562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,fp8,0,1.291376034418742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,128,0,1,fp8,fp8,0,1.1832746664683025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,float16,0,1.3043306668599446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,fp8,0,1.2848587036132812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,128,0,1,fp8,fp8,0,1.1359626452128093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,float16,0,1.329312006632487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,fp8,0,1.311791976292928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,float16,0,0.7415839831034342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,128,0,1,fp8,fp8,0,1.141482671101888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,128,0,1,fp8,fp8,0,0.6616266568501791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,fp8,0,0.748522679011027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,float16,0,0.7062346935272217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,fp8,0,0.6973439852396647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,128,0,1,fp8,fp8,0,0.6207733154296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,float16,0,0.6960000197092692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,fp8,0,0.6950026353200277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,128,0,1,fp8,fp8,0,0.635045329729716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,float16,0,0.7068266868591309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,fp8,0,0.7010080019632975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,128,0,1,fp8,fp8,0,0.6246560017267863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,float16,0,0.7048052946726481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,fp8,0,0.7054239908854166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,128,0,1,fp8,fp8,0,0.6435946623484293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,float16,0,0.4254186550776164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,128,0,1,fp8,fp8,0,0.39297600587209064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,fp8,0,0.42683200041453045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,float16,0,0.39774401982625324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,fp8,0,0.3991146485010783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,128,0,1,fp8,fp8,0,0.37190401554107666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,float16,0,0.40194133917490643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,fp8,0,0.40275200208028156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,float16,0,0.40252800782521564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,fp8,0,0.40300798416137695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,128,0,1,fp8,fp8,0,0.36987733840942383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,128,0,1,fp8,fp8,0,0.3722933530807495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,float16,0,0.4103786547978719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,fp8,0,0.4122186501820882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,128,0,1,fp8,fp8,0,0.3775093158086141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,128,0,1,fp8,fp8,0,4.249130566914876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,float16,0,5.247770627339681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,128,0,1,fp8,fp8,0,4.284522692362468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,fp8,0,5.300917307535808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,float16,0,5.198080062866211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,fp8,0,5.285029411315918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,float16,0,5.291248003641765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,float16,0,2.800858815511068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,fp8,0,2.7769546508789062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,fp8,0,5.319120089213054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,128,0,1,fp8,fp8,0,4.331706682840983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,128,0,1,fp8,fp8,0,4.346298535664876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,float16,0,5.428255716959636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,fp8,0,5.442362467447917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,128,0,1,fp8,fp8,0,2.523695945739746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,float16,0,2.5328426361083984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,128,0,1,fp8,fp8,0,2.138000011444092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,fp8,0,2.536757310231527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,float16,0,2.4704906145731607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,128,0,1,fp8,fp8,0,2.220656077067057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,fp8,0,2.5864426294962564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,float16,0,2.550538698832194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,128,0,1,fp8,fp8,0,2.1625493367513022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,fp8,0,2.574949264526367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,float16,0,1.4183146158854167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,float16,0,2.5746827125549316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,fp8,0,2.5377599398295083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,128,0,1,fp8,fp8,0,2.1908960342407227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,128,0,1,fp8,fp8,0,1.2290826638539631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,fp8,0,1.430906613667806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,float16,0,1.2758879661560059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,fp8,0,1.3158186276753743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,128,0,1,fp8,fp8,0,1.1437760194142659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,128,0,1,fp8,fp8,0,1.1181386311848958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,fp8,0,1.2773866653442383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,float16,0,1.2686879634857178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,float16,0,1.2999520301818848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,128,0,1,fp8,fp8,0,1.1339946587880452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,fp8,0,1.288256009419759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,float16,0,1.3000906308492024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,float16,0,0.7199520270029703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,fp8,0,0.7426400184631348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,fp8,0,1.2940373420715332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,128,0,1,fp8,fp8,0,1.1374186674753826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,128,0,1,fp8,fp8,0,0.6548320055007935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,fp8,0,0.6724426746368408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,float16,0,0.6700692971547445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,128,0,1,fp8,fp8,0,0.5980319976806641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,float16,0,0.6912906964619955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,fp8,0,0.6834080219268799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,128,0,1,fp8,fp8,0,0.6026560068130493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,float16,0,0.6805760065714518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,fp8,0,0.6812853018442789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,128,0,1,fp8,fp8,0,0.6069066524505615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,float16,0,0.6790826320648193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,float16,0,0.402511994043986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,fp8,0,0.6938239733378092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,128,0,1,fp8,fp8,0,0.6235733429590861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,fp8,0,0.4087040026982625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,128,0,1,fp8,fp8,0,0.3697226842244466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,float16,0,0.37149866422017414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,fp8,0,0.3729066848754883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,fp8,0,0.3744693199793498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,128,0,1,fp8,fp8,0,0.34303466478983563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,128,0,1,fp8,fp8,0,0.34462932745615643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,float16,0,0.37060264746348065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,float16,0,0.37781866391499835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,fp8,0,0.37884267171223956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,float16,0,0.3805226484934489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,128,0,1,fp8,fp8,0,0.34661865234375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,fp8,0,0.3840533494949341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,128,0,1,fp8,fp8,0,0.3480693499247233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,float16,0,0.24378132820129395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,fp8,0,0.24731733401616415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,128,0,1,fp8,fp8,0,0.22945600748062134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,128,0,1,fp8,fp8,0,0.2107893427213033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,float16,0,0.22670400142669678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,fp8,0,0.22579733530680338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,128,0,1,fp8,fp8,0,0.21015999714533487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,float16,0,0.22671999533971152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,fp8,0,0.22723732391993204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,float16,0,0.22760534286499023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,fp8,0,0.22770132621129355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,fp8,0,0.23031467199325562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,128,0,1,fp8,fp8,0,0.21100266774495444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,float16,0,0.22972800334294638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,128,0,1,fp8,fp8,0,0.21562665700912476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,float16,0,3.115349451700846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,128,0,1,fp8,fp8,0,2.720143953959147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,fp8,0,3.298346519470215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,128,0,1,fp8,fp8,0,2.7287092208862305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,float16,0,3.267056147257487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,fp8,0,3.146906534830729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,float16,0,3.283402760823568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,128,0,1,fp8,fp8,0,2.750960032145182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,float16,0,1.7599892616271973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,fp8,0,3.249354680379232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,float16,0,3.3911574681599936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,128,0,1,fp8,fp8,0,2.8067518870035806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,fp8,0,1.7947039604187012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,fp8,0,3.2959839502970376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,128,0,1,fp8,fp8,0,1.5467626253763835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,float16,0,1.566549301147461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,fp8,0,1.5950080553690593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,128,0,1,fp8,fp8,0,1.4516372680664062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,float16,0,1.5731253623962402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,128,0,1,fp8,fp8,0,1.406826655069987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,fp8,0,1.6377013524373372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,float16,0,1.5821545918782551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,fp8,0,1.59225066502889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,128,0,1,fp8,fp8,0,1.4466506640116374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,float16,0,0.9132800102233887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,float16,0,1.6119573911031086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,128,0,1,fp8,fp8,0,1.4169813791910808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,fp8,0,1.6320373217264812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,fp8,0,0.9075360298156738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,128,0,1,fp8,fp8,0,0.818021297454834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,float16,0,0.8086613019307455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,fp8,0,0.8244533538818359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,128,0,1,fp8,fp8,0,0.7869653701782227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,float16,0,0.8229173024495443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,fp8,0,0.8270666599273682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,128,0,1,fp8,fp8,0,0.7266240119934082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,float16,0,0.8173226515452067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,fp8,0,0.8202880223592123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,128,0,1,fp8,fp8,0,0.7326613267262777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,float16,0,0.8469706376393636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,float16,0,0.4761333465576172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,fp8,0,0.8443146546681722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,128,0,1,fp8,fp8,0,0.7492427031199137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,128,0,1,fp8,fp8,0,0.4384160041809082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,fp8,0,0.48342398802439374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,float16,0,0.43672533830006915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,fp8,0,0.43877331415812176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,128,0,1,fp8,fp8,0,0.3959999879201253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,float16,0,0.43907733758290607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,fp8,0,0.439029335975647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,128,0,1,fp8,fp8,0,0.39822932084401447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,float16,0,0.43983999888102215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,fp8,0,0.4434560139973958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,128,0,1,fp8,fp8,0,0.40170133113861084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,128,0,1,fp8,fp8,0,0.40482668081919354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,float16,0,0.44553065299987793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,fp8,0,0.4514293273289998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,float16,0,0.2708746592203776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,fp8,0,0.2762719988822937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,128,0,1,fp8,fp8,0,0.2305013338724772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,128,0,1,fp8,fp8,0,0.2534666657447815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,float16,0,0.24623467524846396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,fp8,0,0.24654932816823324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,float16,0,0.2473706603050232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,fp8,0,0.24738667408625284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,128,0,1,fp8,fp8,0,0.23207465807596842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,float16,0,0.25464532772699994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,float16,0,0.24991466601689658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,fp8,0,0.24938132365544638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,128,0,1,fp8,fp8,0,0.2342080076535543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,128,0,1,fp8,fp8,0,0.160863995552063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,fp8,0,0.25577600797017414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,float16,0,0.1572426656881968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,128,0,1,fp8,fp8,0,0.23639466365178427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,float16,0,0.1686826745669047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,fp8,0,0.17092265685399374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,fp8,0,0.1574026644229889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,128,0,1,fp8,fp8,0,0.1483573317527771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,float16,0,0.1572426656881968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,fp8,0,0.1564533313115438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,128,0,1,fp8,fp8,0,0.1476426621278127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,float16,0,0.15773866573969522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,fp8,0,0.1583146651585897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,128,0,1,fp8,fp8,0,0.1476906637350718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,float16,0,0.1581760048866272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,fp8,0,0.15854400396347046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,128,0,1,fp8,fp8,0,0.14733866850535074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,128,0,1,fp8,fp8,0,2.9873387018839517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,float16,0,3.4291254679361978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,fp8,0,3.5553067525227866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,float16,0,3.523632049560547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,128,0,1,fp8,fp8,0,3.0106452306111655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,fp8,0,3.5312105814615884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,float16,0,1.9226346015930176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,128,0,1,fp8,fp8,0,3.040074666341146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,float16,0,3.546741485595703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,fp8,0,3.519498825073242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,float16,0,3.6173601150512695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,128,0,1,fp8,fp8,0,3.0882078806559243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,fp8,0,3.6358985900878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,fp8,0,1.9243466059366863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,128,0,1,fp8,fp8,0,1.7184906005859375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,float16,0,1.6915574073791504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,fp8,0,1.7159360249837239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,128,0,1,fp8,fp8,0,1.5220746994018555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,float16,0,1.6918667157491047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,fp8,0,1.7456159591674805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,128,0,1,fp8,fp8,0,1.5194133122762044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,float16,0,1.7080480257670085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,128,0,1,fp8,fp8,0,1.565008004506429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,fp8,0,1.7351412773132324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,float16,0,1.7596319516499836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,fp8,0,1.7508533795674641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,128,0,1,fp8,fp8,0,1.5792427062988281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,float16,0,0.9677226543426514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,fp8,0,0.9863253434499105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,128,0,1,fp8,fp8,0,0.8878080050150553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,fp8,0,0.8781599998474121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,float16,0,0.8731839656829834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,128,0,1,fp8,fp8,0,0.7897066275278727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,float16,0,0.8775359789530436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,fp8,0,0.871397336324056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,128,0,1,fp8,fp8,0,0.7793280283610026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,float16,0,0.872053305308024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,fp8,0,0.886735995610555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,128,0,1,fp8,fp8,0,0.7842666308085123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,float16,0,0.8894026279449463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,fp8,0,0.9004053274790446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,float16,0,0.5069493452707926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,fp8,0,0.5166826645533243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,128,0,1,fp8,fp8,0,0.7956373691558838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,128,0,1,fp8,fp8,0,0.47117865085601807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,float16,0,0.45365333557128906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,fp8,0,0.4537546634674072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,128,0,1,fp8,fp8,0,0.4110293388366699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,float16,0,0.45812265078226727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,fp8,0,0.45757333437601727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,128,0,1,fp8,fp8,0,0.4145760138829549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,float16,0,0.4613066514333089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,fp8,0,0.46325333913167316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,fp8,0,0.47145601113637287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,128,0,1,fp8,fp8,0,0.4164373477300008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,float16,0,0.46751999855041504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,128,0,1,fp8,fp8,0,0.4241386651992798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,float16,0,0.2769813338915507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,fp8,0,0.28383467594782513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,128,0,1,fp8,fp8,0,0.26024534304936725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,float16,0,0.2505066593488057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,float16,0,0.24785600105921426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,fp8,0,0.24923733870188394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,128,0,1,fp8,fp8,0,0.23005332549413046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,fp8,0,0.24912534157435098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,128,0,1,fp8,fp8,0,0.23202133178710938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,float16,0,0.2537386616071065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,fp8,0,0.25331199169158936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,128,0,1,fp8,fp8,0,0.23451199134190878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,fp8,0,0.1680906613667806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,float16,0,0.25685866673787433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,fp8,0,0.2592800060908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,128,0,1,fp8,fp8,0,0.23691733678181967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,float16,0,0.16473600268363953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,128,0,1,fp8,fp8,0,0.1553546686967214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,float16,0,0.1441973348458608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,fp8,0,0.14550399780273438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,128,0,1,fp8,fp8,0,0.133242666721344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,float16,0,0.14615466197331747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,fp8,0,0.1458560029665629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,128,0,1,fp8,fp8,0,0.13401599725087485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,float16,0,0.14645333091417947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,fp8,0,0.14652799566586813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,float16,0,0.10364266236623128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,128,0,1,fp8,fp8,0,0.13741866747538248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,float16,0,0.1492853363355001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,fp8,0,0.15090133746465048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,128,0,1,fp8,fp8,0,0.1418719987074534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,fp8,0,0.10526399811108907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,128,0,1,fp8,fp8,0,0.10073600212732951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,float16,0,0.09982400139172871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,fp8,0,0.09948800007502238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,fp8,0,0.09940800070762634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,128,0,1,fp8,fp8,0,0.09476799766222636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,float16,0,0.10075199604034424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,128,0,1,fp8,fp8,0,0.09327999750773112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,float16,0,0.09917866190274556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,fp8,0,0.09909866253534953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,128,0,1,fp8,fp8,0,0.09358400106430054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,float16,0,0.09935466448465984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,fp8,0,0.10158933202425639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,128,0,1,fp8,fp8,0,0.09497066338857015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,float16,0,2.262767950693766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,128,0,1,fp8,fp8,0,2.0023040771484375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,fp8,0,2.2664106686909995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,float16,0,2.2849173545837402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,128,0,1,fp8,fp8,0,2.015018622080485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,fp8,0,2.2546133995056152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,float16,0,2.305370648701986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,128,0,1,fp8,fp8,0,2.035210609436035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,fp8,0,2.290448029836019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,float16,0,2.408895969390869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,128,0,1,fp8,fp8,0,2.076533317565918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,fp8,0,2.3470613161722818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,float16,0,1.289024035135905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,128,0,1,fp8,fp8,0,1.1753599643707275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,float16,0,1.1258506774902344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,fp8,0,1.3276480038960774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,128,0,1,fp8,fp8,0,1.0092213153839111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,fp8,0,1.1503520011901855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,float16,0,1.1404799620310466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,fp8,0,1.1401279767354329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,128,0,1,fp8,fp8,0,1.0143253008524578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,float16,0,1.1407093207041423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,128,0,1,fp8,fp8,0,1.0263946851094563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,fp8,0,1.1628906726837158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,float16,0,1.180725336074829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,fp8,0,1.1772853533426921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,128,0,1,fp8,fp8,0,1.0510506629943848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,float16,0,0.660261352856954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,fp8,0,0.6704906622568766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,128,0,1,fp8,fp8,0,0.6080319881439209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,float16,0,0.5884319941202799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,fp8,0,0.5868959824244181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,float16,0,0.5861119826634725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,128,0,1,fp8,fp8,0,0.526042660077413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,fp8,0,0.587061325709025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,128,0,1,fp8,fp8,0,0.5271093448003134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,float16,0,0.5883200168609619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,fp8,0,0.596837321917216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,128,0,1,fp8,fp8,0,0.5331893364588419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,float16,0,0.6006186803181967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,fp8,0,0.6076693137486776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,float16,0,0.3489813407262166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,128,0,1,fp8,fp8,0,0.5457386573155721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,fp8,0,0.3553493420283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,128,0,1,fp8,fp8,0,0.327349325021108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,float16,0,0.30883200963338214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,fp8,0,0.31083200375239056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,128,0,1,fp8,fp8,0,0.28541866938273114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,float16,0,0.31098665793736774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,fp8,0,0.3123626708984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,128,0,1,fp8,fp8,0,0.28935466210047406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,128,0,1,fp8,fp8,0,0.2847893238067627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,float16,0,0.3142506678899129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,fp8,0,0.3171253403027852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,float16,0,0.32055999835332233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,fp8,0,0.32315733035405475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,128,0,1,fp8,fp8,0,0.2940426667531331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,float16,0,0.19748266537984213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,fp8,0,0.1711733341217041
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,fp8,0,0.19942933320999146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,float16,0,0.17142399152119955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,128,0,1,fp8,fp8,0,0.1848533352216085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,float16,0,0.17149867614110312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,128,0,1,fp8,fp8,0,0.16266666849454245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,fp8,0,0.1737119952837626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,128,0,1,fp8,fp8,0,0.16274133324623108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,float16,0,0.1737013260523478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,fp8,0,0.17484800020853677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,128,0,1,fp8,fp8,0,0.16429866353670755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,float16,0,0.18026133378346762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,fp8,0,0.181167999903361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,128,0,1,fp8,fp8,0,0.1691946585973104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,float16,0,0.11718400319417317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,fp8,0,0.11953066786130269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,128,0,1,fp8,fp8,0,0.11341866850852966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,float16,0,0.10424533486366272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,fp8,0,0.10543466607729594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,128,0,1,fp8,fp8,0,0.09553066889444987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,float16,0,0.10475732882817586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,fp8,0,0.1060693363348643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,128,0,1,fp8,fp8,0,0.09701866904894511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,float16,0,0.10462400317192078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,fp8,0,0.10724266370137532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,128,0,1,fp8,fp8,0,0.09706667065620422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,float16,0,0.10746133327484131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,fp8,0,0.107013334830602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,128,0,1,fp8,fp8,0,0.09912000099817912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,float16,0,0.07613866527875264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,fp8,0,0.07669333120187123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,128,0,1,fp8,fp8,0,0.07262933254241943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,float16,0,0.07449066638946533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,fp8,0,0.07421866556008656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,fp8,0,0.07427200178305308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,128,0,1,fp8,fp8,0,0.07011199990908305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,float16,0,0.07458133498827617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,128,0,1,fp8,fp8,0,0.07030933101971944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,float16,0,0.0745119998852412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,float16,0,0.07464533547560374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,fp8,0,0.07425066828727722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,128,0,1,fp8,fp8,0,0.0701333334048589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,fp8,0,0.07464533547560374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,128,0,1,fp8,fp8,0,0.07018133501211803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,float16,0,2.362698713938395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,fp8,0,2.354848066965739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,128,0,1,fp8,fp8,0,2.2335306803385415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,float16,0,2.433909257253011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,fp8,0,2.443461259206136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,128,0,1,fp8,fp8,0,2.42575470606486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,float16,0,2.440432071685791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,fp8,0,2.4671947161356607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,128,0,1,fp8,fp8,0,2.4522347450256348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,float16,0,2.5429760615030923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,fp8,0,2.58244260152181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,128,0,1,fp8,fp8,0,2.626490592956543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,float16,0,1.436090628306071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,fp8,0,1.4256480534871419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,128,0,1,fp8,fp8,0,1.3589760462443035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,float16,0,1.1988426844278972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,fp8,0,1.2354186375935872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,128,0,1,fp8,fp8,0,1.136565367380778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,float16,0,1.2176319758097331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,fp8,0,1.2109920183817546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,128,0,1,fp8,fp8,0,1.2025813261667888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,float16,0,1.219002644220988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,fp8,0,1.2341972986857097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,128,0,1,fp8,fp8,0,1.2076693375905354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,float16,0,1.2544639905293782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,fp8,0,1.2582773367563884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,float16,0,0.7111413478851318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,fp8,0,0.7000853220621744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,float16,0,0.6069333155949911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,128,0,1,fp8,fp8,0,0.68777068456014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,128,0,1,fp8,fp8,0,1.3022879759470622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,fp8,0,0.6039733489354452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,128,0,1,fp8,fp8,0,0.5762346585591634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,float16,0,0.615120013554891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,fp8,0,0.6191840171813965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,128,0,1,fp8,fp8,0,0.5972319841384888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,fp8,0,0.6246933142344157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,float16,0,0.6231199900309244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,128,0,1,fp8,fp8,0,0.5950933297475179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,float16,0,0.6376159985860189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,fp8,0,0.6338826815287272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,float16,0,0.3673226833343506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,128,0,1,fp8,fp8,0,0.6455519994099935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,fp8,0,0.3596320152282715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,float16,0,0.31493866443634033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,128,0,1,fp8,fp8,0,0.35473068555196124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,fp8,0,0.3152959942817688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,128,0,1,fp8,fp8,0,0.29847999413808185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,float16,0,0.31996800502141315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,fp8,0,0.3209226727485657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,128,0,1,fp8,fp8,0,0.30484267075856525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,float16,0,0.3234986662864685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,fp8,0,0.32177066802978516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,128,0,1,fp8,fp8,0,0.309663991133372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,float16,0,0.3304640054702759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,fp8,0,0.3299573262532552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,128,0,1,fp8,fp8,0,0.31388266881306964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,float16,0,0.19721599419911703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,fp8,0,0.19374932845433554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,128,0,1,fp8,fp8,0,0.18927466869354248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,float16,0,0.16887466112772623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,fp8,0,0.16981865962346396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,128,0,1,fp8,fp8,0,0.1581706702709198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,float16,0,0.17053866386413574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,fp8,0,0.17075733343760172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,128,0,1,fp8,fp8,0,0.16164799531300864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,float16,0,0.17297067244847616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,fp8,0,0.17223467429478964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,128,0,1,fp8,fp8,0,0.1677280068397522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,128,0,1,fp8,fp8,0,0.1653439998626709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,float16,0,0.17695466677347818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,fp8,0,0.1755680044492086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,float16,0,0.11052266756693523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,fp8,0,0.1076586643854777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,128,0,1,fp8,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,fp8,0,0.09477866689364116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,float16,0,0.09460799892743428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,fp8,0,0.09427733222643535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,128,0,1,fp8,fp8,0,0.0865066647529602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,float16,0,0.09305066863695781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,fp8,0,0.09542933106422424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,128,0,1,fp8,fp8,0,0.08685866991678874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,128,0,1,fp8,fp8,0,0.09331732988357544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,float16,0,0.09506133198738098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,128,0,1,fp8,fp8,0,0.08910399675369263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,fp8,0,0.09807466467221577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,float16,0,0.09756799538930257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,float16,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,fp8,0,0.0645546664794286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,128,0,1,fp8,fp8,0,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,128,0,1,fp8,fp8,0,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,float16,0,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,fp8,0,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,float16,0,0.060346667965253196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,128,0,1,fp8,fp8,0,0.05576533575852712
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,float16,0,0.06102400024731954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,fp8,0,0.06186666587988535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,float16,0,0.060266668597857155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,fp8,0,0.06052800019582113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,128,0,1,fp8,fp8,0,0.056218668818473816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,128,0,1,fp8,fp8,0,0.05653333167235056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,float16,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,128,0,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,float16,0,0.037903999288876854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,float16,0,0.03771200031042099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,fp8,0,0.03806933263937632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,128,0,1,fp8,fp8,0,0.0369759996732076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,128,0,1,fp8,fp8,0,0.037178667883078255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,float16,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,float16,0,0.03730133424202601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,128,0,1,fp8,fp8,0,0.03629866739114126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,128,0,1,fp8,fp8,0,0.03659733384847641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,float16,0,2.000293254852295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,fp8,0,1.9982560475667317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,1,128,0,1,fp8,fp8,0,1.9125067392985027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,float16,0,2.057941277821859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,128,0,1,fp8,fp8,0,2.1156907081604004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,fp8,0,2.0901546478271484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,float16,0,2.0844640731811523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,fp8,0,2.100506623586019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,4,128,0,1,fp8,fp8,0,2.134394645690918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,float16,0,1.2300586700439453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,float16,0,2.138751983642578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,fp8,0,2.138437271118164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,32,8,128,0,1,fp8,fp8,0,2.3061013221740723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,fp8,0,1.2161493301391602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,float16,0,1.0106080373128254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,32,128,0,1,fp8,fp8,0,1.198800007502238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,fp8,0,1.0099093119303386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,1,128,0,1,fp8,fp8,0,0.966538667678833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,float16,0,1.03002134958903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,fp8,0,1.0358346303304036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,2,128,0,1,fp8,fp8,0,1.0374613602956135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,float16,0,1.0407413641611736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,fp8,0,1.0461333592732747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,4,128,0,1,fp8,fp8,0,1.0520533720652263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,float16,0,1.0731840133666992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,fp8,0,1.0654346942901611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,float16,0,0.612069328625997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,fp8,0,0.6031946738560995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,32,8,128,0,1,fp8,fp8,0,1.1387946605682373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,32,128,0,1,fp8,fp8,0,0.6010026534398397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,float16,0,0.5169599850972494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,128,0,1,fp8,fp8,0,0.49536534150441486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,fp8,0,0.515717347462972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,float16,0,0.5260586738586426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,fp8,0,0.5278186798095703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,2,128,0,1,fp8,fp8,0,0.5136533180872599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,float16,0,0.529146671295166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,fp8,0,0.5314079920450846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,4,128,0,1,fp8,fp8,0,0.5174826780954996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,float16,0,0.5462719996770223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,fp8,0,0.5416266520818075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,128,0,1,fp8,fp8,0,0.31061333417892456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,float16,0,0.3157973289489746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,32,8,128,0,1,fp8,fp8,0,0.558784008026123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,128,0,1,fp8,fp8,0,0.25493866205215454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,fp8,0,0.3092586596806844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,float16,0,0.2688000003496806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,fp8,0,0.27110934257507324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,float16,0,0.27146132787068683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,fp8,0,0.2706186572710673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,128,0,1,fp8,fp8,0,0.26526399453481037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,2,128,0,1,fp8,fp8,0,0.26367467641830444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,fp8,0,0.28276799122492474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,float16,0,0.27292799949645996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,fp8,0,0.2741653323173523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,float16,0,0.28275199731191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,32,8,128,0,1,fp8,fp8,0,0.26921600103378296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,float16,0,0.1709386706352234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,fp8,0,0.16620266437530518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,32,128,0,1,fp8,fp8,0,0.16701332728068033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,float16,0,0.1431946655114492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,fp8,0,0.14332266648610434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,1,128,0,1,fp8,fp8,0,0.13582932949066162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,float16,0,0.14593066771825156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,fp8,0,0.14587199687957764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,2,128,0,1,fp8,fp8,0,0.13955733180046082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,float16,0,0.14748266339302063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,fp8,0,0.14666666587193808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,4,128,0,1,fp8,fp8,0,0.1411146620909373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,float16,0,0.15134933590888977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,fp8,0,0.15196266770362854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,128,0,1,fp8,fp8,0,0.09257066249847412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,32,8,128,0,1,fp8,fp8,0,0.14435199896494547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,float16,0,0.09471467137336731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,fp8,0,0.09278399745623271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,float16,0,0.07831466694672902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,fp8,0,0.07831466694672902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,128,0,1,fp8,fp8,0,0.07431999842325847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,1,128,0,1,fp8,fp8,0,0.07205333312352498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,float16,0,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,fp8,0,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,float16,0,0.07900799810886383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,fp8,0,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,4,128,0,1,fp8,fp8,0,0.07533866663773854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,float16,0,0.0820906658967336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,fp8,0,0.08262933293978374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,32,8,128,0,1,fp8,fp8,0,0.07858133316040039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,float16,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,128,0,1,fp8,fp8,0,0.04656533400217692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,32,128,0,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,float16,0,0.04957866668701172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,fp8,0,0.051216001311937966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,float16,0,0.05143466591835022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,fp8,0,0.04985600213209788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,128,0,1,fp8,fp8,0,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,2,128,0,1,fp8,fp8,0,0.04754666487375895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,float16,0,0.05002133548259735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,fp8,0,0.04953599969546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,float16,0,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,fp8,0,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,32,8,128,0,1,fp8,fp8,0,0.04774933556715647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,float16,0,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,fp8,0,0.0332640012105306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,32,128,0,1,fp8,fp8,0,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,float16,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,fp8,0,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,1,128,0,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,fp8,0,0.031925333042939506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,2,128,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,float16,0,0.03148266673088074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,fp8,0,0.032586666444937386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,4,128,0,1,fp8,fp8,0,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,float16,0,0.03214933226505915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,32,8,128,0,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,32,128,0,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,float16,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,1,128,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,float16,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,2,128,0,1,fp8,fp8,0,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,4,128,0,1,fp8,fp8,0,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,32,8,128,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,float16,0,0.9121493498484293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,fp8,0,0.909279982248942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,1,128,0,1,fp8,fp8,0,0.8729813098907471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,fp8,0,0.9249599774678549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,float16,0,0.9249973297119141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,2,128,0,1,fp8,fp8,0,0.937274694442749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,float16,0,0.940602699915568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,fp8,0,0.938975969950358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,4,128,0,1,fp8,fp8,0,0.9638613065083822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,float16,0,0.9789600372314453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,float16,0,0.5571680068969727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,fp8,0,0.5450826485951742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,fp8,0,0.9706186453501383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,32,128,0,1,fp8,fp8,0,0.5559999942779541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,float16,0,0.4654293457667033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,32,8,128,0,1,fp8,fp8,0,1.0583199659983318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,fp8,0,0.4650239944458008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,1,128,0,1,fp8,fp8,0,0.4452373186747233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,float16,0,0.4693066676457723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,fp8,0,0.4713333447774251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,2,128,0,1,fp8,fp8,0,0.46649599075317383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,fp8,0,0.47860264778137207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,float16,0,0.4776800076166789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,4,128,0,1,fp8,fp8,0,0.46880535284678143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,float16,0,0.4940799872080485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,fp8,0,0.49190934499104816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,float16,0,0.29050666093826294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,32,8,128,0,1,fp8,fp8,0,0.5189973513285319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,fp8,0,0.2847786744435628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,32,128,0,1,fp8,fp8,0,0.28910932938257855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,float16,0,0.24220800399780273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,fp8,0,0.24195732673009238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,1,128,0,1,fp8,fp8,0,0.22976533571879068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,float16,0,0.24288000663121542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,fp8,0,0.24566400051116943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,fp8,0,0.24186132351557413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,2,128,0,1,fp8,fp8,0,0.23763734102249146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,float16,0,0.24762133757273355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,4,128,0,1,fp8,fp8,0,0.2389706571896871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,float16,0,0.25645333528518677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,fp8,0,0.25468266010284424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,32,8,128,0,1,fp8,fp8,0,0.24730666478474936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,float16,0,0.15440533558527628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,fp8,0,0.15178133050600687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,32,128,0,1,fp8,fp8,0,0.15315733353296915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,float16,0,0.12728533148765564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,fp8,0,0.1269546647866567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,1,128,0,1,fp8,fp8,0,0.1199679970741272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,float16,0,0.12914666533470154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,fp8,0,0.1279039978981018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,2,128,0,1,fp8,fp8,0,0.1244053343931834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,float16,0,0.13212266564369202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,fp8,0,0.1307253340880076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,4,128,0,1,fp8,fp8,0,0.12777066230773926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,float16,0,0.13505599896113077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,fp8,0,0.13502933581670126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,32,8,128,0,1,fp8,fp8,0,0.12989333271980286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,float16,0,0.08783466617266338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,fp8,0,0.08447466293970744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,32,128,0,1,fp8,fp8,0,0.08682666222254436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,float16,0,0.07223999996980031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,fp8,0,0.07203733424345653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,1,128,0,1,fp8,fp8,0,0.0668213317791621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,float16,0,0.0724373310804367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,fp8,0,0.07264000177383423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,2,128,0,1,fp8,fp8,0,0.06810133159160614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,float16,0,0.07407466570536296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,float16,0,0.07314666608969371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,fp8,0,0.07275733351707458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,4,128,0,1,fp8,fp8,0,0.0680213322242101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,fp8,0,0.07557866473992665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,32,8,128,0,1,fp8,fp8,0,0.07344533503055573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,float16,0,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,32,128,0,1,fp8,fp8,0,0.05005866785844167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,float16,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,128,0,1,fp8,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,fp8,0,0.044069334864616394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,1,128,0,1,fp8,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,float16,0,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,float16,0,0.043968002001444496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,fp8,0,0.04378133515516917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,4,128,0,1,fp8,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,float16,0,0.04641066491603851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,fp8,0,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,32,8,128,0,1,fp8,fp8,0,0.04266133407751719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,fp8,0,0.02977599948644638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,32,128,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,float16,0,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,1,128,0,1,fp8,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,float16,0,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,2,128,0,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,fp8,0,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,4,128,0,1,fp8,fp8,0,0.027855999767780304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,float16,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,fp8,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,32,8,128,0,1,fp8,fp8,0,0.02888533224662145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,32,128,0,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,float16,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,1,128,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,2,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,128,0,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,4,128,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,float16,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,float16,0,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,1,128,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,float16,0,0.020261333634455998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,2,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,4,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,float16,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,32,8,128,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,float16,0,0.5094506740570068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,fp8,0,0.5076266527175903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,1,128,0,1,fp8,fp8,0,0.4891093174616496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,float16,0,0.5172160069147745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,fp8,0,0.5149919986724854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,2,128,0,1,fp8,fp8,0,0.5077279806137085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,float16,0,0.52292267481486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,fp8,0,0.5235413312911987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,4,128,0,1,fp8,fp8,0,0.5133119821548462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,float16,0,0.5421706835428873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,fp8,0,0.5394186576207479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,float16,0,0.30901867151260376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,fp8,0,0.30249067147572833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,32,8,128,0,1,fp8,fp8,0,0.5504479805628458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,float16,0,0.26371200879414874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,32,128,0,1,fp8,fp8,0,0.30595733722050983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,fp8,0,0.2626933256785075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,1,128,0,1,fp8,fp8,0,0.2500693400700887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,float16,0,0.2645919919013977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,fp8,0,0.2679520050684611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,2,128,0,1,fp8,fp8,0,0.2558079957962036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,float16,0,0.2696266571680705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,fp8,0,0.26871466636657715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,4,128,0,1,fp8,fp8,0,0.26101332902908325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,float16,0,0.27797333399454754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,fp8,0,0.27934932708740234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,32,8,128,0,1,fp8,fp8,0,0.2648533384005229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,float16,0,0.16332266728083292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,fp8,0,0.16035733620325723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,32,128,0,1,fp8,fp8,0,0.1606613298257192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,float16,0,0.13798399766286215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,fp8,0,0.13851733009020487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,1,128,0,1,fp8,fp8,0,0.12945066889127096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,float16,0,0.14005866646766663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,fp8,0,0.1390506625175476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,float16,0,0.14102933804194132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,2,128,0,1,fp8,fp8,0,0.13492799798647562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,fp8,0,0.1431839962800344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,4,128,0,1,fp8,fp8,0,0.13662933309872946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,float16,0,0.14762666821479797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,fp8,0,0.1453546682993571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,32,8,128,0,1,fp8,fp8,0,0.14062933127085367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,float16,0,0.08813333511352539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,fp8,0,0.08759466807047527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,32,128,0,1,fp8,fp8,0,0.08921600381533305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,float16,0,0.07415999968846639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,fp8,0,0.07448000212510426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,fp8,0,0.07524799803892772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,1,128,0,1,fp8,fp8,0,0.0690880020459493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,float16,0,0.07451733450094859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,2,128,0,1,fp8,fp8,0,0.06946133573849995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,128,0,1,fp8,fp8,0,0.07006933291753133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,fp8,0,0.07561066746711731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,float16,0,0.07863466441631317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,fp8,0,0.07834666470686595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,float16,0,0.07660266757011414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,float16,0,0.05028266708056132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,32,8,128,0,1,fp8,fp8,0,0.07622933387756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,float16,0,0.04753600060939789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,fp8,0,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,float16,0,0.04660266637802124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,128,0,1,fp8,fp8,0,0.053674668073654175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,fp8,0,0.0516533354918162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,1,128,0,1,fp8,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,fp8,0,0.04762133459250132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,float16,0,0.04822933177153269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,2,128,0,1,fp8,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,float16,0,0.046821330984433494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,4,128,0,1,fp8,fp8,0,0.0441599984963735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,fp8,0,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,32,8,128,0,1,fp8,fp8,0,0.045194665590922035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,float16,0,0.03175999969244003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,32,128,0,1,fp8,fp8,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,128,0,1,fp8,fp8,0,0.029690665503342945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,1,128,0,1,fp8,fp8,0,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,128,0,1,fp8,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,float16,0,0.03014933317899704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,fp8,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,float16,0,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,32,8,128,0,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,32,128,0,1,fp8,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,128,0,1,fp8,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,fp8,0,0.019978666057189304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,1,128,0,1,fp8,fp8,0,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,float16,0,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,4,128,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,float16,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,32,8,128,0,1,fp8,fp8,0,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,32,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,1,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,4,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,32,8,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,32,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,1,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,2,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,4,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,32,8,128,0,1,fp8,fp8,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,float16,0,0.3449173370997111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,fp8,0,0.3426133394241333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,1,128,0,1,fp8,fp8,0,0.32418133815129596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,float16,0,0.34785600503285724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,fp8,0,0.3485333522160848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,2,128,0,1,fp8,fp8,0,0.33488531907399494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,float16,0,0.3490080038706462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,fp8,0,0.3490026791890462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,4,128,0,1,fp8,fp8,0,0.3364479939142863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,float16,0,0.3622026840845744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,float16,0,0.2037973403930664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,fp8,0,0.3588213523228963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,32,8,128,0,1,fp8,fp8,0,0.3375626802444458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,fp8,0,0.20090667406717935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,32,128,0,1,fp8,fp8,0,0.19710934162139893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,float16,0,0.18012267351150513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,fp8,0,0.18006932735443115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,1,128,0,1,fp8,fp8,0,0.16860800981521606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,float16,0,0.1814346710840861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,fp8,0,0.18105065822601318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,fp8,0,0.18067733446756998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,2,128,0,1,fp8,fp8,0,0.17349867026011148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,float16,0,0.18306666612625122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,float16,0,0.10801600416501363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,4,128,0,1,fp8,fp8,0,0.17371733983357748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,float16,0,0.18813333908716837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,fp8,0,0.18683733542760214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,32,8,128,0,1,fp8,fp8,0,0.17669866482416788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,fp8,0,0.10672000050544739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,32,128,0,1,fp8,fp8,0,0.10719999670982361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,float16,0,0.09511466821034749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,fp8,0,0.09687466422716777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,1,128,0,1,fp8,fp8,0,0.08647466699282329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,float16,0,0.09661333759625752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,fp8,0,0.09683733185132344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,float16,0,0.09883733590443929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,2,128,0,1,fp8,fp8,0,0.08832533160845439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,float16,0,0.09668800234794617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,fp8,0,0.09642666578292847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,4,128,0,1,fp8,fp8,0,0.09047466516494751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,fp8,0,0.09900266925493877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,32,8,128,0,1,fp8,fp8,0,0.094458669424057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,fp8,0,0.0558240016301473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,float16,0,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,fp8,0,0.05884799857934316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,32,128,0,1,fp8,fp8,0,0.05885866781075796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,128,0,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,float16,0,0.0568800022204717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,1,128,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,float16,0,0.055957332253456116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,fp8,0,0.05604266623655955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,float16,0,0.05592533449331919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,4,128,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,float16,0,0.057333335280418396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,fp8,0,0.056186666091283165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,32,8,128,0,1,fp8,fp8,0,0.052005335688591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,float16,0,0.0373279998699824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,float16,0,0.03629333277543386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,32,128,0,1,fp8,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,float16,0,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,1,128,0,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,2,128,0,1,fp8,fp8,0,0.034346667428811394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,float16,0,0.03675200045108795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,128,0,1,fp8,fp8,0,0.03524799893299738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,fp8,0,0.036992001036802925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,4,128,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,fp8,0,0.03694933404525121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,32,128,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,float16,0,0.024080000817775726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,1,128,0,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,float16,0,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,2,128,0,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,4,128,0,1,fp8,fp8,0,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,fp8,0,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,32,8,128,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,32,128,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,1,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,float16,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,2,128,0,1,fp8,fp8,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,4,128,0,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,float16,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,32,8,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,128,0,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,32,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,4,128,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,32,8,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,32,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,1,128,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,4,128,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,32,8,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,float16,0,0.26159467299779254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,fp8,0,0.26071999470392865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,fp8,0,0.26286933819452923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,1,128,0,1,fp8,fp8,0,0.2446826696395874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,float16,0,0.26261333624521893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,2,128,0,1,fp8,fp8,0,0.2502826650937398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,float16,0,0.2630506753921509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,fp8,0,0.2626826763153076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,4,128,0,1,fp8,fp8,0,0.250383992989858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,float16,0,0.2664693395296733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,fp8,0,0.2667466600735982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,float16,0,0.14987732966740927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,32,8,128,0,1,fp8,fp8,0,0.25220799446105957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,fp8,0,0.14829867084821066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,32,128,0,1,fp8,fp8,0,0.14519466956456503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,float16,0,0.13661332925160727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,fp8,0,0.13645333051681519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,1,128,0,1,fp8,fp8,0,0.1260373294353485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,float16,0,0.13843733072280884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,float16,0,0.13635200262069702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,fp8,0,0.1381013294061025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,2,128,0,1,fp8,fp8,0,0.12612799803415933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,fp8,0,0.13699199755986533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,4,128,0,1,fp8,fp8,0,0.12800000111262003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,float16,0,0.14110933740933737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,fp8,0,0.1397546629110972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,32,8,128,0,1,fp8,fp8,0,0.13169067104657492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,float16,0,0.08194666604201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,fp8,0,0.08063466846942902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,32,128,0,1,fp8,fp8,0,0.08006399869918823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,float16,0,0.07706133524576823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,fp8,0,0.07704533139864604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,float16,0,0.0766133318344752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,1,128,0,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,fp8,0,0.0765226682027181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,2,128,0,1,fp8,fp8,0,0.07026133437951405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,float16,0,0.07735466460386912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,fp8,0,0.07763200004895528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,4,128,0,1,fp8,fp8,0,0.07047999898592631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,float16,0,0.07695466776688893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,fp8,0,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,fp8,0,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,float16,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,32,8,128,0,1,fp8,fp8,0,0.0711946686108907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,float16,0,0.047194664676984154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,32,128,0,1,fp8,fp8,0,0.043749332427978516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,fp8,0,0.044879997769991554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,1,128,0,1,fp8,fp8,0,0.043434664607048035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,float16,0,0.0458133320013682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,128,0,1,fp8,fp8,0,0.041946664452552795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,2,128,0,1,fp8,fp8,0,0.04215466479460398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,float16,0,0.04493333399295807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,128,0,1,fp8,fp8,0,0.0425600012143453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,fp8,0,0.04515199859937032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,fp8,0,0.030495998760064442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,float16,0,0.045834665497144066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,fp8,0,0.04659733176231384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,float16,0,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,32,128,0,1,fp8,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,float16,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,fp8,0,0.030981334547201794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,1,128,0,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,float16,0,0.030762667457262676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,128,0,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,2,128,0,1,fp8,fp8,0,0.028101332485675812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,float16,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,float16,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,32,8,128,0,1,fp8,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,fp8,0,0.021967999637126923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,32,128,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,float16,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,1,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,2,128,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,4,128,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,32,8,128,0,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,32,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,1,128,0,1,fp8,fp8,0,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,2,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,float16,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,32,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,1,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,2,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,4,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,32,8,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,float16,0,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,32,128,0,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,1,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,2,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,float16,0,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,4,128,0,1,fp8,fp8,0,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,32,8,128,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,float16,0,0.21591466665267944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,fp8,0,0.21566933393478394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,1,128,0,1,fp8,fp8,0,0.20265066623687744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,float16,0,0.215338667233785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,fp8,0,0.21660800774892172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,2,128,0,1,fp8,fp8,0,0.20388267437616983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,float16,0,0.2172586719195048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,fp8,0,0.21502399444580078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,4,128,0,1,fp8,fp8,0,0.20564266045888266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,float16,0,0.2190613349278768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,fp8,0,0.21899199485778809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,32,8,128,0,1,fp8,fp8,0,0.20899200439453125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,float16,0,0.11926399668057759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,fp8,0,0.11896533767382304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,32,128,0,1,fp8,fp8,0,0.11857600013415019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,float16,0,0.11593600114186604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,fp8,0,0.11562666296958923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,1,128,0,1,fp8,fp8,0,0.10917866230010986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,128,0,1,fp8,fp8,0,0.10964266459147136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,float16,0,0.11546666423479716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,fp8,0,0.1172106663386027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,float16,0,0.11574932932853699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,fp8,0,0.11534933249155681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,4,128,0,1,fp8,fp8,0,0.1093386709690094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,float16,0,0.11691199739774068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,fp8,0,0.11641066273053487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,128,0,1,fp8,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,32,8,128,0,1,fp8,fp8,0,0.1113973359266917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,float16,0,0.06739733119805653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,128,0,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,fp8,0,0.0664213349421819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,float16,0,0.06614399949709575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,fp8,0,0.06611200173695882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,float16,0,0.06465599934260051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,2,128,0,1,fp8,fp8,0,0.06162666777769724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,float16,0,0.06439466774463654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,float16,0,0.06433066725730896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,fp8,0,0.06622933348019917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,4,128,0,1,fp8,fp8,0,0.0618399977684021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,128,0,1,fp8,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,32,8,128,0,1,fp8,fp8,0,0.0625493327776591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,128,0,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,float16,0,0.040021332601706185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,fp8,0,0.041093334555625916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,float16,0,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,fp8,0,0.038949333131313324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,float16,0,0.03912533322970072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,2,128,0,1,fp8,fp8,0,0.03737066686153412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,float16,0,0.03977066775163015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,fp8,0,0.040421334405740104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,4,128,0,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,float16,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,32,8,128,0,1,fp8,fp8,0,0.03896533449490865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,float16,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,32,128,0,1,fp8,fp8,0,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,fp8,0,0.026320000489552815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,1,128,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,fp8,0,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,float16,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,2,128,0,1,fp8,fp8,0,0.026394667724768322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,float16,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,4,128,0,1,fp8,fp8,0,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,32,8,128,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,float16,0,0.02037866661945979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,32,128,0,1,fp8,fp8,0,0.01979200045267741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,fp8,0,0.020501332978407543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,1,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,float16,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,float16,0,0.020186666399240494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,2,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,float16,0,0.02037866661945979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,4,128,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,32,8,128,0,1,fp8,fp8,0,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,32,128,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,float16,0,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,1,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,float16,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,4,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,32,8,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,32,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,1,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,float16,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,2,128,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,4,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,32,8,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,32,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,1,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,2,128,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,4,128,0,1,fp8,fp8,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,32,8,128,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,128,0,1,fp8,fp8,0,9.056639989217123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,128,0,1,fp8,fp8,0,9.164997100830078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,float16,0,11.77679443359375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,fp8,0,11.614356994628906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,float16,0,11.602186838785807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,fp8,0,11.781915028889975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,float16,0,11.702560424804688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,float16,0,6.043999989827474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,128,0,1,fp8,fp8,0,9.404415766398111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,fp8,0,12.210650126139322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,128,0,1,fp8,fp8,0,9.25320561726888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,fp8,0,6.080453236897786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,float16,0,11.971914927164713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,fp8,0,12.160906473795572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,128,0,1,fp8,fp8,0,4.909008026123047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,float16,0,5.970981597900391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,128,0,1,fp8,fp8,0,4.528965314229329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,fp8,0,6.071498870849609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,float16,0,5.923418680826823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,128,0,1,fp8,fp8,0,4.534895896911621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,fp8,0,6.029834747314453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,float16,0,6.030975977579753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,128,0,1,fp8,fp8,0,4.632714589436849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,fp8,0,6.051034927368164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,float16,0,3.041301409403483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,128,0,1,fp8,fp8,0,4.649402618408203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,float16,0,5.9877974192301435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,fp8,0,3.0077012379964194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,fp8,0,6.311381022135417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,128,0,1,fp8,fp8,0,2.4626612663269043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,float16,0,2.8656746546427407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,fp8,0,2.931920051574707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,128,0,1,fp8,fp8,0,2.3831574122111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,128,0,1,fp8,fp8,0,2.4332799911499023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,fp8,0,2.8897120157877603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,float16,0,3.1896371841430664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,float16,0,2.9803946812947593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,fp8,0,2.9640960693359375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,128,0,1,fp8,fp8,0,2.4113759994506836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,float16,0,2.977562586466471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,float16,0,1.566549301147461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,fp8,0,1.6137386957804363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,128,0,1,fp8,fp8,0,2.4755733807881675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,fp8,0,3.048490524291992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,128,0,1,fp8,fp8,0,1.4668693542480469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,float16,0,1.5151039759318035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,fp8,0,1.5792694091796875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,128,0,1,fp8,fp8,0,1.3505066235860188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,float16,0,1.536677360534668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,fp8,0,1.5479146639506023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,128,0,1,fp8,fp8,0,1.4321759541829426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,float16,0,1.5444374084472656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,fp8,0,1.5343573888142903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,128,0,1,fp8,fp8,0,1.370757261912028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,float16,0,1.5274559656778972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,fp8,0,1.537109375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,128,0,1,fp8,fp8,0,1.39629332224528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,128,0,1,fp8,fp8,0,5.3257706960042315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,float16,0,6.9158986409505205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,fp8,0,6.916255950927734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,128,0,1,fp8,fp8,0,5.3850663503011065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,float16,0,6.981141408284505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,fp8,0,6.762437184651692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,float16,0,6.838346481323242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,float16,0,3.467130661010742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,128,0,1,fp8,fp8,0,5.384682973225911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,fp8,0,3.667759895324707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,fp8,0,6.963328043619792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,128,0,1,fp8,fp8,0,5.363813400268555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,float16,0,7.122416178385417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,fp8,0,7.1104482014973955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,128,0,1,fp8,fp8,0,2.901701291402181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,float16,0,3.3789599736531577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,128,0,1,fp8,fp8,0,2.740351994832357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,fp8,0,3.420373280843099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,float16,0,3.460895856221517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,128,0,1,fp8,fp8,0,2.854341189066569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,fp8,0,3.57476806640625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,128,0,1,fp8,fp8,0,2.856997489929199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,fp8,0,3.474383989969889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,float16,0,3.592325210571289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,float16,0,3.5284481048583984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,128,0,1,fp8,fp8,0,2.8801279067993164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,fp8,0,3.5012906392415366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,float16,0,1.8046399752298992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,fp8,0,1.8056853612263997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,128,0,1,fp8,fp8,0,1.6385653813680012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,float16,0,1.9001760482788086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,fp8,0,1.748794714609782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,128,0,1,fp8,fp8,0,1.5188533465067546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,float16,0,1.6975253423055012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,128,0,1,fp8,fp8,0,1.492143948872884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,fp8,0,1.7466452916463215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,float16,0,1.7124266624450684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,fp8,0,1.7174293200174968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,128,0,1,fp8,fp8,0,1.5186346371968586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,float16,0,1.715727965037028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,fp8,0,1.727237383524577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,float16,0,1.0013973712921143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,fp8,0,1.0084693431854248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,128,0,1,fp8,fp8,0,1.4849759737650554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,128,0,1,fp8,fp8,0,0.9603253205617269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,float16,0,0.9455200036366781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,fp8,0,1.0119413534800212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,128,0,1,fp8,fp8,0,0.8998613357543945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,float16,0,0.9525439739227295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,128,0,1,fp8,fp8,0,0.831109364827474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,fp8,0,0.9490880171457926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,float16,0,0.9459413687388102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,fp8,0,0.9563360214233398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,128,0,1,fp8,fp8,0,0.8761599858601888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,float16,0,0.9577759901682535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,128,0,1,fp8,fp8,0,0.840837319691976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,fp8,0,0.958784023920695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,128,0,1,fp8,fp8,0,3.8722667694091797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,float16,0,4.961925188700358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,fp8,0,4.973535855611165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,128,0,1,fp8,fp8,0,3.827808062235514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,float16,0,4.894277254740397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,fp8,0,4.9686934153238935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,float16,0,5.036239941914876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,float16,0,2.630613327026367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,128,0,1,fp8,fp8,0,3.8631518681844077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,fp8,0,2.533578713734945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,128,0,1,fp8,fp8,0,3.996639887491862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,float16,0,4.97164789835612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,fp8,0,4.963178634643555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,fp8,0,4.986757278442383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,128,0,1,fp8,fp8,0,2.23417599995931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,float16,0,2.4647253354390464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,128,0,1,fp8,fp8,0,1.9912800788879395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,fp8,0,2.5160959561665854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,float16,0,2.4127413431803384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,fp8,0,2.4077919324239097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,128,0,1,fp8,fp8,0,2.064821402231852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,float16,0,1.28547199567159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,fp8,0,2.3748745918273926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,128,0,1,fp8,fp8,0,2.1204479535420737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,128,0,1,fp8,fp8,0,2.081077257792155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,float16,0,2.366175969441732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,float16,0,2.3335092862447104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,fp8,0,2.4703946113586426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,128,0,1,fp8,fp8,0,1.2745227018992107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,fp8,0,1.2984639803568523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,128,0,1,fp8,fp8,0,1.1411786874135335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,float16,0,1.2568693161010742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,fp8,0,1.2611680030822754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,fp8,0,1.250762701034546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,float16,0,1.2447946866353352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,128,0,1,fp8,fp8,0,1.1025066375732422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,float16,0,1.2521546681722004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,128,0,1,fp8,fp8,0,1.0949013233184814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,float16,0,1.2723146279652913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,float16,0,0.7212639649709066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,fp8,0,1.2769920031229656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,fp8,0,0.7330133120218912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,128,0,1,fp8,fp8,0,1.0947466691335042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,128,0,1,fp8,fp8,0,0.668773333231608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,float16,0,0.6917546590169271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,128,0,1,fp8,fp8,0,0.6239733298619589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,fp8,0,0.7371040185292562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,float16,0,0.7140373388926188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,fp8,0,0.6979573567708334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,128,0,1,fp8,fp8,0,0.6260693470637003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,float16,0,0.7023786703745524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,fp8,0,0.6994773546854655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,128,0,1,fp8,fp8,0,0.6283520062764486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,float16,0,0.7173386414845785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,fp8,0,0.715994675954183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,128,0,1,fp8,fp8,0,0.6335359811782837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,128,0,1,fp8,fp8,0,5.15778128306071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,fp8,0,1.2467466990152996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,128,0,1,fp8,fp8,0,5.227029482523601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,float16,0,6.616720199584961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,fp8,0,6.643290837605794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,float16,0,6.634656270345052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,fp8,0,6.531056086222331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,float16,0,6.738997141520183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,float16,0,3.443920135498047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,fp8,0,3.470111846923828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,128,0,1,fp8,fp8,0,5.299231847127278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,fp8,0,6.6270402272542315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,128,0,1,fp8,fp8,0,5.27835210164388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,float16,0,6.561418533325195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,fp8,0,6.899808247884114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,128,0,1,fp8,fp8,0,2.934368133544922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,float16,0,3.147029240926107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,fp8,0,3.1966772079467773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,128,0,1,fp8,fp8,0,2.8289972941080728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,float16,0,3.133813222249349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,128,0,1,fp8,fp8,0,2.7582613627115884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,fp8,0,3.1712586085001626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,float16,0,3.1489171981811523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,fp8,0,3.2793652216593423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,128,0,1,fp8,fp8,0,2.739253362019857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,float16,0,1.7162879308064778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,float16,0,3.5792105992635093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,fp8,0,1.7293492952982585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,128,0,1,fp8,fp8,0,1.5953760147094727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,fp8,0,3.284645398457845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,128,0,1,fp8,fp8,0,2.7604586283365884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,float16,0,1.6769973436991374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,fp8,0,1.5929546356201172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,128,0,1,fp8,fp8,0,1.5078506469726562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,float16,0,1.7215894063313801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,128,0,1,fp8,fp8,0,1.3883253733317058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,fp8,0,1.6115892728169758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,fp8,0,1.673850695292155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,float16,0,1.6090453465779622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,128,0,1,fp8,fp8,0,1.530176003774007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,float16,0,1.6383147239685059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,float16,0,0.8939200242360433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,fp8,0,1.6453973452250164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,fp8,0,0.9132373332977295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,128,0,1,fp8,fp8,0,1.4520266850789387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,128,0,1,fp8,fp8,0,0.9159253438313802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,float16,0,0.8692959944407145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,fp8,0,0.875829299290975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,128,0,1,fp8,fp8,0,0.7800800005594889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,float16,0,0.8552107016245524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,fp8,0,0.9340213139851888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,float16,0,0.8732053438822428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,128,0,1,fp8,fp8,0,0.7642292976379395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,fp8,0,0.8753653367360433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,float16,0,0.8814826806386312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,fp8,0,0.8770293394724528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,128,0,1,fp8,fp8,0,0.7682293256123861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,128,0,1,fp8,fp8,0,0.7599413394927979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,float16,0,0.5181920131047567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,fp8,0,0.5191146532694498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,128,0,1,fp8,fp8,0,0.46613868077596027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,float16,0,0.49035199483235675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,fp8,0,0.49017067750295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,128,0,1,fp8,fp8,0,0.46462400754292804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,float16,0,0.4867146809895833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,128,0,1,fp8,fp8,0,0.4472373326619466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,fp8,0,0.487552007039388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,float16,0,0.49675198396046955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,fp8,0,0.49225600560506183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,128,0,1,fp8,fp8,0,0.4485119978586833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,float16,0,0.5009066661198934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,fp8,0,0.5028426647186279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,128,0,1,fp8,fp8,0,0.4525013367335002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,128,0,1,fp8,fp8,0,3.1702667872111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,float16,0,3.9643465677897134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,fp8,0,3.8487625122070312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,128,0,1,fp8,fp8,0,3.1623093287150064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,float16,0,4.023290634155273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,fp8,0,4.0289920171101885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,float16,0,4.000746726989746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,fp8,0,3.964949289957682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,float16,0,2.012890656789144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,128,0,1,fp8,fp8,0,3.1960372924804688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,fp8,0,2.0846773783365884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,128,0,1,fp8,fp8,0,1.8224372863769531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,float16,0,1.9166080156962078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,128,0,1,fp8,fp8,0,3.234970728556315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,float16,0,3.858949343363444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,fp8,0,4.104101181030273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,128,0,1,fp8,fp8,0,1.6167680422465007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,fp8,0,1.8995946248372395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,float16,0,1.943002700805664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,128,0,1,fp8,fp8,0,1.6771413485209148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,fp8,0,1.9292052586873372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,float16,0,1.9406240781148274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,fp8,0,1.9587465922037761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,128,0,1,fp8,fp8,0,1.673733393351237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,float16,0,1.0456373691558838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,float16,0,1.972042719523112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,fp8,0,1.1649920145670574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,128,0,1,fp8,fp8,0,0.9796106815338135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,fp8,0,1.9600106875101726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,128,0,1,fp8,fp8,0,1.7376906077067058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,float16,0,1.0079519748687744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,fp8,0,0.989957332611084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,128,0,1,fp8,fp8,0,0.9196960131327311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,float16,0,1.016650676727295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,fp8,0,0.9969013532002767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,128,0,1,fp8,fp8,0,0.8908267021179199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,float16,0,0.9952906767527262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,fp8,0,0.9999840259552002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,128,0,1,fp8,fp8,0,0.8690773646036783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,float16,0,1.006874640782674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,float16,0,0.5753120183944702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,fp8,0,1.0246826807657878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,128,0,1,fp8,fp8,0,0.8828159968058268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,fp8,0,0.5833599964777628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,128,0,1,fp8,fp8,0,0.5325440168380737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,fp8,0,0.5401333173116049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,128,0,1,fp8,fp8,0,0.48444799582163495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,float16,0,0.5451253255208334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,fp8,0,0.543071985244751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,128,0,1,fp8,fp8,0,0.49456532796223956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,float16,0,0.547599991162618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,fp8,0,0.5468959808349609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,128,0,1,fp8,fp8,0,0.49038398265838623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,float16,0,0.5560586849848429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,fp8,0,0.5549653371175131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,128,0,1,fp8,fp8,0,0.4960533380508423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,float16,0,0.3390666643778483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,fp8,0,0.3415199915568034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,float16,0,0.5370186567306519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,128,0,1,fp8,fp8,0,0.3121760090192159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,float16,0,0.3181546727816264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,fp8,0,0.31850665807724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,128,0,1,fp8,fp8,0,0.29075199365615845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,float16,0,0.31786133845647174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,fp8,0,0.3195733428001404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,128,0,1,fp8,fp8,0,0.2916319966316223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,float16,0,0.31806399424870807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,fp8,0,0.3206506570180257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,128,0,1,fp8,fp8,0,0.2966559926668803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,float16,0,0.3258026639620463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,fp8,0,0.3266826669375102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,128,0,1,fp8,fp8,0,0.301530659198761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,128,0,1,fp8,fp8,0,3.246090571085612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,float16,0,3.955733299255371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,fp8,0,3.958362579345703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,128,0,1,fp8,fp8,0,3.2801599502563477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,float16,0,3.9607359568277993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,fp8,0,4.001445452372233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,float16,0,4.039039929707845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,fp8,0,4.109994570414226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,float16,0,2.1261439323425293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,fp8,0,2.1635306676228843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,128,0,1,fp8,fp8,0,3.2939840952555337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,128,0,1,fp8,fp8,0,1.94160000483195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,float16,0,1.943493366241455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,128,0,1,fp8,fp8,0,3.348165194193522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,float16,0,4.032554626464844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,fp8,0,4.061317443847656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,fp8,0,1.9381225903828938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,128,0,1,fp8,fp8,0,1.727765401204427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,float16,0,1.889802614847819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,128,0,1,fp8,fp8,0,1.7208587328592937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,fp8,0,1.9123093287150066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,float16,0,1.933301289876302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,fp8,0,1.9296213785807292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,128,0,1,fp8,fp8,0,1.8654026985168457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,float16,0,1.060378630956014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,fp8,0,1.1523253122965496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,128,0,1,fp8,fp8,0,0.992474635442098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,float16,0,2.005338668823242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,128,0,1,fp8,fp8,0,1.7446826299031575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,fp8,0,2.0127786000569663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,float16,0,0.9955626328786215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,fp8,0,0.9856266975402832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,128,0,1,fp8,fp8,0,0.8618133068084717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,float16,0,0.994762659072876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,fp8,0,0.991002639134725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,128,0,1,fp8,fp8,0,0.8775146802266439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,float16,0,0.9836853345235189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,fp8,0,1.021781365076701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,128,0,1,fp8,fp8,0,0.8899146715799967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,float16,0,1.007813294728597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,float16,0,0.564847985903422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,fp8,0,1.0120586554209392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,fp8,0,0.57915198802948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,128,0,1,fp8,fp8,0,0.8880799611409506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,128,0,1,fp8,fp8,0,0.5085759957631429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,float16,0,0.5349440177281698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,fp8,0,0.5234400033950806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,128,0,1,fp8,fp8,0,0.4810346762339274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,float16,0,0.525599996248881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,float16,0,0.5285973151524862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,fp8,0,0.5259626706441244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,128,0,1,fp8,fp8,0,0.4713759819666545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,float16,0,0.5349013408025106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,fp8,0,0.5289760033289591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,128,0,1,fp8,fp8,0,0.4736693302790324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,fp8,0,0.5387093226114908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,128,0,1,fp8,fp8,0,0.4806613524754842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,float16,0,0.3185173273086548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,fp8,0,0.3230453332265218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,128,0,1,fp8,fp8,0,0.292794664700826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,float16,0,0.29337066411972046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,fp8,0,0.29475200176239014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,128,0,1,fp8,fp8,0,0.26945600907007855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,float16,0,0.2932906746864319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,fp8,0,0.29974399010340375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,fp8,0,0.2948426604270935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,128,0,1,fp8,fp8,0,0.2722346584002177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,float16,0,0.297050674756368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,128,0,1,fp8,fp8,0,0.2751413385073344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,float16,0,0.3047306736310323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,fp8,0,0.30556267499923706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,128,0,1,fp8,fp8,0,0.2776160041491191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,float16,0,0.19721599419911703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,fp8,0,0.19922133286794028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,128,0,1,fp8,fp8,0,0.18285866578420004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,fp8,0,0.18595200777053833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,float16,0,0.18498667081197104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,fp8,0,0.18557333946228027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,128,0,1,fp8,fp8,0,0.17089066902796426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,float16,0,0.18516266345977783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,128,0,1,fp8,fp8,0,0.17082132895787558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,float16,0,0.18480533361434937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,fp8,0,0.1872053345044454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,128,0,1,fp8,fp8,0,0.17010666926701865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,float16,0,0.1867520014444987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,fp8,0,0.1864373286565145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,128,0,1,fp8,fp8,0,0.17247466246287027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,float16,0,2.3957227071126304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,128,0,1,fp8,fp8,0,2.0883572896321616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,fp8,0,2.456831932067871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,float16,0,2.4252266883850098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,128,0,1,fp8,fp8,0,2.0985973676045737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,fp8,0,2.501333395640055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,float16,0,2.4816853205362954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,float16,0,1.349237283070882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,128,0,1,fp8,fp8,0,2.126709302266439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,fp8,0,1.3806719779968262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,fp8,0,2.5181919733683267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,128,0,1,fp8,fp8,0,1.2337226867675781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,float16,0,2.462015946706136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,128,0,1,fp8,fp8,0,2.2493972778320312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,fp8,0,2.5686826705932617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,float16,0,1.2185173034667969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,fp8,0,1.2159093221028645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,128,0,1,fp8,fp8,0,1.0969866911570232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,float16,0,1.2561439673105876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,fp8,0,1.2224106788635254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,128,0,1,fp8,fp8,0,1.172218640645345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,float16,0,1.2216533025105794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,fp8,0,1.267093340555827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,128,0,1,fp8,fp8,0,1.0812479654947917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,float16,0,0.6905386447906494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,float16,0,1.2487733364105225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,fp8,0,1.2742133140563965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,fp8,0,0.7215627034505209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,128,0,1,fp8,fp8,0,1.1152160167694092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,128,0,1,fp8,fp8,0,0.6257280111312866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,float16,0,0.640559991200765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,fp8,0,0.6343733469645182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,128,0,1,fp8,fp8,0,0.5632053216298422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,float16,0,0.6360160112380981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,fp8,0,0.672432025273641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,128,0,1,fp8,fp8,0,0.5673653284708658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,float16,0,0.6426773468653361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,fp8,0,0.6419359842936198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,fp8,0,0.6559520165125529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,128,0,1,fp8,fp8,0,0.5741493304570516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,float16,0,0.6502933502197266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,128,0,1,fp8,fp8,0,0.5821546713511149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,float16,0,0.37989866733551025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,fp8,0,0.3816853364308675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,128,0,1,fp8,fp8,0,0.34487998485565186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,float16,0,0.34055999914805096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,fp8,0,0.3425866762797038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,128,0,1,fp8,fp8,0,0.3118720054626465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,float16,0,0.34351468086242676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,fp8,0,0.34730132420857746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,128,0,1,fp8,fp8,0,0.3132693370183309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,float16,0,0.34885334968566895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,fp8,0,0.3518506685892741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,128,0,1,fp8,fp8,0,0.3158346613248189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,float16,0,0.3543999989827474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,fp8,0,0.3587786753972371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,128,0,1,fp8,fp8,0,0.3208799958229065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,float16,0,0.21919999519983926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,fp8,0,0.22055466969807944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,128,0,1,fp8,fp8,0,0.2036693294843038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,float16,0,0.19750400384267172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,fp8,0,0.1974239945411682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,128,0,1,fp8,fp8,0,0.17867734034856161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,float16,0,0.19836799303690592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,fp8,0,0.19779733816782633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,128,0,1,fp8,fp8,0,0.1829973260561625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,float16,0,0.20009599129358926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,fp8,0,0.19966934124628702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,fp8,0,0.1400373379389445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,128,0,1,fp8,fp8,0,0.1864479978879293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,float16,0,0.20486400524775186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,fp8,0,0.20620266596476236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,128,0,1,fp8,fp8,0,0.190175990263621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,128,0,1,fp8,fp8,0,0.12262933452924092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,float16,0,0.13869333267211914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,128,0,1,fp8,fp8,0,0.13030399878819784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,float16,0,0.1323253313700358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,fp8,0,0.1322986682256063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,float16,0,0.13291733463605246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,fp8,0,0.13226667046546936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,128,0,1,fp8,fp8,0,0.12229333321253459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,float16,0,0.13249066472053528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,fp8,0,0.1339466671148936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,128,0,1,fp8,fp8,0,0.12350400288899739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,float16,0,0.13366933663686117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,fp8,0,0.13528533776601157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,128,0,1,fp8,fp8,0,0.1230560044447581
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,float16,0,2.667344093322754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,128,0,1,fp8,fp8,0,2.2944639523824057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,fp8,0,2.6669438680013022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,float16,0,2.6281654040018716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,128,0,1,fp8,fp8,0,2.3148320515950522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,fp8,0,2.664794603983561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,float16,0,2.736821174621582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,fp8,0,2.7221174240112305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,float16,0,1.46725861231486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,fp8,0,1.4912427266438801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,128,0,1,fp8,fp8,0,2.367146650950114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,128,0,1,fp8,fp8,0,1.3607199986775715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,float16,0,2.744490623474121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,128,0,1,fp8,fp8,0,2.413109302520752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,fp8,0,2.736469268798828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,float16,0,1.3185706933339436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,fp8,0,1.3229013284047444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,128,0,1,fp8,fp8,0,1.1848053137461345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,float16,0,1.3136320114135742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,fp8,0,1.3408479690551758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,128,0,1,fp8,fp8,0,1.193285306294759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,float16,0,1.3320480187733967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,fp8,0,1.3490293820699055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,128,0,1,fp8,fp8,0,1.1832106908162434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,float16,0,1.355141321818034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,float16,0,0.7607786655426025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,fp8,0,0.772816022237142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,fp8,0,1.3697919845581055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,128,0,1,fp8,fp8,0,1.206170638402303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,128,0,1,fp8,fp8,0,0.6818133195241293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,float16,0,0.6800266901652018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,fp8,0,0.6786560217539469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,128,0,1,fp8,fp8,0,0.623802661895752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,float16,0,0.6854506333669027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,fp8,0,0.6880319913228353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,128,0,1,fp8,fp8,0,0.606655995051066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,float16,0,0.6847626368204752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,fp8,0,0.6893119812011719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,128,0,1,fp8,fp8,0,0.6106826861699423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,float16,0,0.6996106306711832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,fp8,0,0.7077759901682535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,float16,0,0.3996586799621582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,128,0,1,fp8,fp8,0,0.6234613259633383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,fp8,0,0.40832531452178955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,128,0,1,fp8,fp8,0,0.36604801813761395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,float16,0,0.35952532291412354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,fp8,0,0.36057599385579425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,128,0,1,fp8,fp8,0,0.32417066891988117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,float16,0,0.36341333389282227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,fp8,0,0.3631733258565267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,128,0,1,fp8,fp8,0,0.325872004032135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,float16,0,0.3666880130767822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,fp8,0,0.3683679898579915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,128,0,1,fp8,fp8,0,0.32928532361984253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,float16,0,0.3710933526357015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,fp8,0,0.3762293259302775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,float16,0,0.22347732384999594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,128,0,1,fp8,fp8,0,0.33630398909250897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,fp8,0,0.2278719941775004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,float16,0,0.20179200172424316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,128,0,1,fp8,fp8,0,0.20589866240819296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,float16,0,0.19777599970499674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,fp8,0,0.19884266455968222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,128,0,1,fp8,fp8,0,0.18198400735855103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,fp8,0,0.20189867417017618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,128,0,1,fp8,fp8,0,0.18369066715240479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,float16,0,0.20292800664901733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,float16,0,0.13226667046546936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,128,0,1,fp8,fp8,0,0.18955200910568237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,fp8,0,0.20531733830769858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,128,0,1,fp8,fp8,0,0.1871359944343567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,float16,0,0.20774400234222412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,fp8,0,0.21121066808700562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,fp8,0,0.13591999808947244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,fp8,0,0.12131200234095256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,128,0,1,fp8,fp8,0,0.12542399764060974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,float16,0,0.11982400218645732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,fp8,0,0.12091733018557231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,128,0,1,fp8,fp8,0,0.10929600397745769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,float16,0,0.11988266309102376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,128,0,1,fp8,fp8,0,0.10981333255767822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,float16,0,0.12326400478680928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,fp8,0,0.12265066305796306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,128,0,1,fp8,fp8,0,0.10972799857457478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,float16,0,0.12332800030708313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,fp8,0,0.12364266316095988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,128,0,1,fp8,fp8,0,0.11448533336321513
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,float16,0,0.08682133754094441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,fp8,0,0.08892800410588582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,128,0,1,fp8,fp8,0,0.08262399832407634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,float16,0,0.08411199847857158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,fp8,0,0.0862613320350647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,128,0,1,fp8,fp8,0,0.07916266719500224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,fp8,0,0.08547199765841167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,float16,0,0.08499733606974284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,fp8,0,0.08590933680534363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,128,0,1,fp8,fp8,0,0.07869333525498708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,float16,0,0.08674133817354839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,128,0,1,fp8,fp8,0,0.0803413341442744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,float16,0,0.0860053300857544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,fp8,0,0.08694932858149211
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,128,0,1,fp8,fp8,0,0.08007466793060303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,float16,0,1.7725706100463867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,128,0,1,fp8,fp8,0,1.5593759218851726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,fp8,0,1.7766027450561523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,float16,0,1.7892212867736816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,128,0,1,fp8,fp8,0,1.5698560078938801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,fp8,0,1.7834986050923665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,float16,0,1.8151413599650066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,128,0,1,fp8,fp8,0,1.6028639475504558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,float16,0,1.0040586789449055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,fp8,0,1.8260265986124675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,fp8,0,1.0166347026824951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,float16,0,1.829477310180664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,fp8,0,1.8517920176188152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,128,0,1,fp8,fp8,0,0.9080906709035238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,128,0,1,fp8,fp8,0,1.6263893445332844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,float16,0,0.8889599641164144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,fp8,0,0.889077345530192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,128,0,1,fp8,fp8,0,0.7882826328277588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,float16,0,0.8959519863128662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,fp8,0,0.8983893394470215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,128,0,1,fp8,fp8,0,0.7936693032582601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,float16,0,0.9010293483734131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,fp8,0,0.9126400152842203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,128,0,1,fp8,fp8,0,0.8036426703135172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,float16,0,0.9205333391825358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,128,0,1,fp8,fp8,0,0.8266346454620361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,fp8,0,0.932101329167684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,float16,0,0.5191306670506796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,fp8,0,0.5291680097579956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,128,0,1,fp8,fp8,0,0.47249066829681396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,float16,0,0.4633226792017619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,fp8,0,0.4680480162302653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,fp8,0,0.46481064955393475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,128,0,1,fp8,fp8,0,0.4103786547978719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,float16,0,0.4666666587193807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,128,0,1,fp8,fp8,0,0.4143466552098592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,float16,0,0.47095998128255206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,fp8,0,0.4724533160527547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,128,0,1,fp8,fp8,0,0.4208799997965495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,fp8,0,0.28361066182454425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,float16,0,0.48131199677785236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,fp8,0,0.4853866497675578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,float16,0,0.2785653273264567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,128,0,1,fp8,fp8,0,0.4305493434270223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,128,0,1,fp8,fp8,0,0.256661335627238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,float16,0,0.2482879956563314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,fp8,0,0.24943999449412027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,128,0,1,fp8,fp8,0,0.224506676197052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,float16,0,0.25172267357508343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,fp8,0,0.25202133258183795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,float16,0,0.25996265808741253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,128,0,1,fp8,fp8,0,0.22774932781855264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,float16,0,0.25358400742212933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,fp8,0,0.2547893325487773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,128,0,1,fp8,fp8,0,0.2300106684366862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,fp8,0,0.2627093394597371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,128,0,1,fp8,fp8,0,0.23625065883000693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,fp8,0,0.13834133744239807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,float16,0,0.15902400016784668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,float16,0,0.13834666212399802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,fp8,0,0.13926933209101358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,fp8,0,0.16172800461451212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,128,0,1,fp8,fp8,0,0.14826132853825888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,float16,0,0.13741333285967508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,128,0,1,fp8,fp8,0,0.12381333112716675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,128,0,1,fp8,fp8,0,0.12665599584579468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,float16,0,0.14165332913398743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,fp8,0,0.14180266857147217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,128,0,1,fp8,fp8,0,0.13147733608881632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,float16,0,0.14491732915242514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,fp8,0,0.1485973298549652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,fp8,0,0.0886293351650238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,128,0,1,fp8,fp8,0,0.08052800099054973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,128,0,1,fp8,fp8,0,0.13617600003878275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,float16,0,0.09373866518338521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,fp8,0,0.09669333696365356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,float16,0,0.088837335507075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,128,0,1,fp8,fp8,0,0.09111467003822327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,128,0,1,fp8,fp8,0,0.08080533146858215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,float16,0,0.0897653301556905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,float16,0,0.08884800473848979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,fp8,0,0.08991466959317525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,128,0,1,fp8,fp8,0,0.08044800162315369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,fp8,0,0.08892800410588582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,fp8,0,0.061941335598627724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,128,0,1,fp8,fp8,0,0.05820266902446747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,float16,0,0.0895146628220876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,fp8,0,0.09037333726882935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,128,0,1,fp8,fp8,0,0.08175999919573466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,float16,0,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,float16,0,0.05923733115196228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,128,0,1,fp8,fp8,0,0.055888002117474876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,float16,0,0.05913599828879038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,fp8,0,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,128,0,1,fp8,fp8,0,0.056133334835370384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,float16,0,0.058149332801500954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,fp8,0,0.06002666552861532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,128,0,1,fp8,fp8,0,0.05614933371543884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,float16,0,0.05890133480230967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,fp8,0,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,128,0,1,fp8,fp8,0,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,float16,0,1.879317283630371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,fp8,0,1.876522699991862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,128,0,1,fp8,fp8,0,1.748751958211263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,float16,0,1.9137226740519206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,fp8,0,1.9078826904296875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,128,0,1,fp8,fp8,0,1.8941760063171387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,float16,0,1.9368747075398762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,fp8,0,1.9558347066243489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,128,0,1,fp8,fp8,0,1.913248062133789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,float16,0,1.100495974222819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,fp8,0,1.0729599793752034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,float16,0,2.0391413370768228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,128,0,1,fp8,fp8,0,1.044101317723592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,float16,0,0.951749324798584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,fp8,0,2.0308480262756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,128,0,1,fp8,fp8,0,1.9378827412923176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,fp8,0,0.9526346524556478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,128,0,1,fp8,fp8,0,0.8871253331502279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,float16,0,0.9604427019755045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,fp8,0,0.9642399946848551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,128,0,1,fp8,fp8,0,0.912874698638916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,float16,0,0.9747093518575033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,fp8,0,0.9749653339385986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,128,0,1,fp8,fp8,0,0.9231999715169271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,float16,0,1.0054453214009602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,float16,0,0.5615679820378622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,fp8,0,0.9935733477274576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,fp8,0,0.5482293367385864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,128,0,1,fp8,fp8,0,0.9717600345611572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,128,0,1,fp8,fp8,0,0.5339200099309286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,128,0,1,fp8,fp8,0,0.45423467953999835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,fp8,0,0.4862080017725627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,128,0,1,fp8,fp8,0,0.4604533513387044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,float16,0,0.48287467161814374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,float16,0,0.49021867911020917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,fp8,0,0.49978665510813397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,fp8,0,0.49270399411519367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,float16,0,0.4976213375727336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,128,0,1,fp8,fp8,0,0.4681653181711833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,float16,0,0.5116053422292074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,fp8,0,0.5079466501871744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,float16,0,0.2940319975217183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,128,0,1,fp8,fp8,0,0.48289068539937335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,fp8,0,0.28968532880147296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,128,0,1,fp8,fp8,0,0.27825599908828735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,float16,0,0.2547520001729329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,fp8,0,0.25540266434351605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,128,0,1,fp8,fp8,0,0.23319466908772787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,fp8,0,0.261135995388031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,float16,0,0.25942399104436237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,fp8,0,0.2595733404159546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,128,0,1,fp8,fp8,0,0.2388746738433838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,float16,0,0.2632319927215576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,fp8,0,0.1586720049381256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,128,0,1,fp8,fp8,0,0.24314665794372559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,float16,0,0.26781866947809857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,fp8,0,0.1381439963976542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,fp8,0,0.26703999439875287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,128,0,1,fp8,fp8,0,0.2516426642735799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,float16,0,0.14054933190345764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,float16,0,0.1621386706829071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,128,0,1,fp8,fp8,0,0.1511306663354238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,float16,0,0.13969600200653076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,128,0,1,fp8,fp8,0,0.1246560017267863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,fp8,0,0.1406773328781128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,128,0,1,fp8,fp8,0,0.13010133306185404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,float16,0,0.14193066954612732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,fp8,0,0.14265599846839905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,128,0,1,fp8,fp8,0,0.13338667154312134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,128,0,1,fp8,fp8,0,0.09123200178146362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,float16,0,0.1459946632385254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,fp8,0,0.14678399761517844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,128,0,1,fp8,fp8,0,0.13822399576505026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,float16,0,0.09327999750773112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,fp8,0,0.092357337474823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,128,0,1,fp8,fp8,0,0.073594664533933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,float16,0,0.08223466575145721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,fp8,0,0.08259200056393941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,fp8,0,0.08106666803359985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,128,0,1,fp8,fp8,0,0.07260266443093617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,float16,0,0.0813920001188914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,fp8,0,0.082096000512441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,128,0,1,fp8,fp8,0,0.07620266576608022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,float16,0,0.08168533444404602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,128,0,1,fp8,fp8,0,0.07234133283297221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,float16,0,0.08396266897519429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,float16,0,0.05415999889373779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,fp8,0,0.08196266492207845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,float16,0,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,fp8,0,0.05811200042565664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,128,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,float16,0,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,fp8,0,0.05565866827964783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,128,0,1,fp8,fp8,0,0.049456000328063965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,128,0,1,fp8,fp8,0,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,float16,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,fp8,0,0.0540533314148585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,float16,0,0.03886933376391729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,128,0,1,fp8,fp8,0,0.049285332361857094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,float16,0,0.055770665407180786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,fp8,0,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,128,0,1,fp8,fp8,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,128,0,1,fp8,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,float16,0,0.03843733419974645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,fp8,0,0.039274667700131737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,128,0,1,fp8,fp8,0,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,float16,0,0.038405333956082664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,fp8,0,0.038693333665529885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,128,0,1,fp8,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,float16,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,fp8,0,0.03904533386230469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,128,0,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,float16,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,128,0,1,fp8,fp8,0,0.03588266670703888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,float16,0,1.609007994333903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,128,0,1,fp8,fp8,0,1.5048160552978516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,fp8,0,1.6086452802022297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,float16,0,1.6543359756469727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,fp8,0,1.6643733978271484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,2,128,0,1,fp8,fp8,0,1.6471412976582844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,float16,0,1.6588160196940105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,fp8,0,1.6944106419881184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,float16,0,0.9490880171457926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,4,128,0,1,fp8,fp8,0,1.6598505973815918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,float16,0,1.76363738377889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,fp8,0,1.7493173281351726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,fp8,0,0.9305013020833334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,24,8,128,0,1,fp8,fp8,0,1.7017280260721843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,24,128,0,1,fp8,fp8,0,0.917365312576294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,fp8,0,0.8119466304779053
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,float16,0,0.8114453156789144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,1,128,0,1,fp8,fp8,0,0.7627360026041666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,float16,0,0.8289120197296143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,fp8,0,0.8286346594492594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,2,128,0,1,fp8,fp8,0,0.8019466400146484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,float16,0,0.8373386859893799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,fp8,0,0.8362133502960205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,4,128,0,1,fp8,fp8,0,0.7912800312042236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,float16,0,0.8607573509216309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,float16,0,0.48847464720408124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,fp8,0,0.8530080318450928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,128,0,1,fp8,fp8,0,0.4687573512395223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,24,8,128,0,1,fp8,fp8,0,0.8405386606852213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,fp8,0,0.4151253302892049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,fp8,0,0.47656532128651935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,float16,0,0.41519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,1,128,0,1,fp8,fp8,0,0.3894079923629761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,float16,0,0.42258667945861816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,fp8,0,0.4229280153910319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,2,128,0,1,fp8,fp8,0,0.3987733523050944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,float16,0,0.42873068650563556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,fp8,0,0.42793067296346027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,float16,0,0.2579200069109599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,4,128,0,1,fp8,fp8,0,0.4055200020472209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,float16,0,0.44201068083445233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,fp8,0,0.437824010848999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,24,8,128,0,1,fp8,fp8,0,0.4157973527908325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,fp8,0,0.2515893379847209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,24,128,0,1,fp8,fp8,0,0.24440000454584757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,fp8,0,0.22207466761271158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,float16,0,0.2194826602935791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,fp8,0,0.21817066272099814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,1,128,0,1,fp8,fp8,0,0.1997226675351461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,float16,0,0.22252267599105835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,2,128,0,1,fp8,fp8,0,0.2072746753692627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,float16,0,0.22587732474009195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,128,0,1,fp8,fp8,0,0.21877866983413696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,fp8,0,0.22540799776713052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,4,128,0,1,fp8,fp8,0,0.2108693321545919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,float16,0,0.11865066488583882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,float16,0,0.23055466016133627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,fp8,0,0.22893333435058594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,float16,0,0.14074132839838663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,fp8,0,0.13800000150998434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,24,128,0,1,fp8,fp8,0,0.13361600041389465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,fp8,0,0.11985066533088684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,fp8,0,0.12272533774375916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,128,0,1,fp8,fp8,0,0.11457600196202596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,1,128,0,1,fp8,fp8,0,0.10873599847157796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,float16,0,0.1202826698621114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,fp8,0,0.1250933309396108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,fp8,0,0.11993599931399028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,2,128,0,1,fp8,fp8,0,0.11103467146555583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,float16,0,0.12308800220489502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,float16,0,0.12716266512870789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,24,8,128,0,1,fp8,fp8,0,0.11966933806737264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,float16,0,0.08065600196520488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,fp8,0,0.07857066889603932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,24,128,0,1,fp8,fp8,0,0.08056533336639404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,float16,0,0.06829333305358887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,fp8,0,0.06881066660086314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,float16,0,0.06965866684913635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,1,128,0,1,fp8,fp8,0,0.06169599791367849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,float16,0,0.06836799780527751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,fp8,0,0.07000533243020375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,fp8,0,0.07014933228492737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,2,128,0,1,fp8,fp8,0,0.06181866427262624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,float16,0,0.04757866760094961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,fp8,0,0.0703413337469101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,4,128,0,1,fp8,fp8,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,float16,0,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,float16,0,0.07051733136177063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,24,8,128,0,1,fp8,fp8,0,0.06620799998442332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,fp8,0,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,24,128,0,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,float16,0,0.045461331804593406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,fp8,0,0.04625066618124644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,1,128,0,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,float16,0,0.04569066564242045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,2,128,0,1,fp8,fp8,0,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,fp8,0,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,4,128,0,1,fp8,fp8,0,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,float16,0,0.04685866832733154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,128,0,1,fp8,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,fp8,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,24,8,128,0,1,fp8,fp8,0,0.04260266820589701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,float16,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,128,0,1,fp8,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,float16,0,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,float16,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,float16,0,0.033029332756996155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,1,128,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,float16,0,0.0276053324341774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,4,128,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,fp8,0,0.03193599979082743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,24,8,128,0,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,24,128,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,float16,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,float16,0,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,1,128,0,1,fp8,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,float16,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,2,128,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,4,128,0,1,fp8,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,float16,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,24,8,128,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,float16,0,0.7332159678141276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,fp8,0,0.7305280367533366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,1,128,0,1,fp8,fp8,0,0.6901493072509766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,float16,0,0.7468319733937582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,fp8,0,0.7458506425221761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,2,128,0,1,fp8,fp8,0,0.7292586962381998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,float16,0,0.7590133349100748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,fp8,0,0.7533546288808187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,4,128,0,1,fp8,fp8,0,0.7171146869659424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,float16,0,0.7855467001597086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,float16,0,0.4458453257878621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,fp8,0,0.4349173307418823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,fp8,0,0.7804906368255615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,24,8,128,0,1,fp8,fp8,0,0.7714080015818278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,24,128,0,1,fp8,fp8,0,0.4355520009994507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,float16,0,0.37592534224192303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,128,0,1,fp8,fp8,0,0.3524159987767537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,fp8,0,0.3749759991963704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,float16,0,0.38234134515126544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,fp8,0,0.3820106585820516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,2,128,0,1,fp8,fp8,0,0.35864531993865967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,float16,0,0.38862399260203045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,fp8,0,0.3975840012232463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,fp8,0,0.38650135199228924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,4,128,0,1,fp8,fp8,0,0.36562132835388184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,float16,0,0.4013226826985677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,float16,0,0.23496532440185547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,24,8,128,0,1,fp8,fp8,0,0.3816266854604085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,fp8,0,0.22952532768249512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,24,128,0,1,fp8,fp8,0,0.22760534286499023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,float16,0,0.19889599084854126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,fp8,0,0.19932266076405844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,float16,0,0.20347734292348227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,1,128,0,1,fp8,fp8,0,0.18189332882563272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,float16,0,0.20148267348607382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,fp8,0,0.20070399840672812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,2,128,0,1,fp8,fp8,0,0.19010132551193237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,fp8,0,0.2039946715037028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,float16,0,0.12841066718101501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,4,128,0,1,fp8,fp8,0,0.19196265935897827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,float16,0,0.2104746699333191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,fp8,0,0.2100586692492167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,24,8,128,0,1,fp8,fp8,0,0.199455996354421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,fp8,0,0.12660800417264303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,24,128,0,1,fp8,fp8,0,0.12384532888730367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,float16,0,0.10867200295130412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,128,0,1,fp8,fp8,0,0.10309867064158122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,fp8,0,0.10838400324185689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,1,128,0,1,fp8,fp8,0,0.09852799773216248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,float16,0,0.11145599683125813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,fp8,0,0.10966400305430095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,float16,0,0.11153067151705424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,fp8,0,0.11308800180753072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,4,128,0,1,fp8,fp8,0,0.1053546667098999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,float16,0,0.11587733030319214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,fp8,0,0.11589866876602173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,24,8,128,0,1,fp8,fp8,0,0.1104213297367096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,float16,0,0.07182399928569794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,128,0,1,fp8,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,fp8,0,0.0705866664648056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,128,0,1,fp8,fp8,0,0.05584533512592316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,24,128,0,1,fp8,fp8,0,0.07423466444015503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,float16,0,0.06214400132497152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,fp8,0,0.062314664324124656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,float16,0,0.06202666461467743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,fp8,0,0.06196799874305725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,float16,0,0.06288533409436543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,float16,0,0.04423999786376953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,fp8,0,0.06340266764163971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,4,128,0,1,fp8,fp8,0,0.05676266551017761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,float16,0,0.06374933322270711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,fp8,0,0.06344000001748402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,24,8,128,0,1,fp8,fp8,0,0.06048533320426941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,float16,0,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,fp8,0,0.043791999419530235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,24,128,0,1,fp8,fp8,0,0.04218666752179464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,float16,0,0.042992000778516136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,fp8,0,0.042853335539499916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,float16,0,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,fp8,0,0.04168533285458883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,1,128,0,1,fp8,fp8,0,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,2,128,0,1,fp8,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,float16,0,0.042122667034467064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,4,128,0,1,fp8,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,24,8,128,0,1,fp8,fp8,0,0.038480001191298165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,float16,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,24,128,0,1,fp8,fp8,0,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,float16,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,1,128,0,1,fp8,fp8,0,0.025957333544890087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,float16,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,2,128,0,1,fp8,fp8,0,0.026736001173655193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,float16,0,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,4,128,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,float16,0,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,24,8,128,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,float16,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,float16,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,24,128,0,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,float16,0,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,1,128,0,1,fp8,fp8,0,0.022261333962281544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,2,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,4,128,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,float16,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,128,0,1,fp8,fp8,0,0.01974933346112569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,24,8,128,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,float16,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,24,128,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,float16,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,2,128,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,float16,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,4,128,0,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,float16,0,0.02288000037272771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,float16,0,0.40489065647125244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,fp8,0,0.40669333934783936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,24,8,128,0,1,fp8,fp8,0,0.020015999674797058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,fp8,0,0.4148213466008504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,1,128,0,1,fp8,fp8,0,0.38818665345509845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,128,0,1,fp8,fp8,0,0.39190399646759033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,float16,0,0.41329065958658856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,float16,0,0.4210346539815267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,128,0,1,fp8,fp8,0,0.3975413242975871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,fp8,0,0.41864001750946045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,float16,0,0.4315893252690633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,fp8,0,0.42989333470662433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,float16,0,0.24764800071716309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,24,8,128,0,1,fp8,fp8,0,0.415669322013855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,fp8,0,0.21104000012079874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,fp8,0,0.24210667610168457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,24,128,0,1,fp8,fp8,0,0.24224533637364706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,float16,0,0.2121493419011434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,1,128,0,1,fp8,fp8,0,0.1950719952583313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,float16,0,0.21412267287572226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,fp8,0,0.21374932924906412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,2,128,0,1,fp8,fp8,0,0.20188266038894653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,float16,0,0.21723200877507529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,fp8,0,0.21757866938908896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,4,128,0,1,fp8,fp8,0,0.20573333899180093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,float16,0,0.22405866781870523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,fp8,0,0.2232853372891744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,24,8,128,0,1,fp8,fp8,0,0.2148533264795939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,float16,0,0.13251733779907227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,fp8,0,0.13051199913024902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,24,128,0,1,fp8,fp8,0,0.13029866417249045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,float16,0,0.11460799972216289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,fp8,0,0.11506666739781697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,1,128,0,1,fp8,fp8,0,0.10521599650382996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,float16,0,0.11680000027020772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,fp8,0,0.11612799763679504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,2,128,0,1,fp8,fp8,0,0.10889066259066264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,float16,0,0.1172213355700175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,fp8,0,0.11731200416882832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,4,128,0,1,fp8,fp8,0,0.11103467146555583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,128,0,1,fp8,fp8,0,0.07634133100509644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,float16,0,0.12154133121172588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,fp8,0,0.11958400408426921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,float16,0,0.07682133217652638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,24,8,128,0,1,fp8,fp8,0,0.11725866794586182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,fp8,0,0.06633066634337108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,fp8,0,0.07614399989446004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,float16,0,0.06717333197593689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,fp8,0,0.06704000135262807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,1,128,0,1,fp8,fp8,0,0.059546664357185364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,float16,0,0.06836266815662384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,float16,0,0.06637866795063019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,2,128,0,1,fp8,fp8,0,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,float16,0,0.0673280010620753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,fp8,0,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,4,128,0,1,fp8,fp8,0,0.06004266440868378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,fp8,0,0.0691786656777064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,24,8,128,0,1,fp8,fp8,0,0.06435733536879222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,float16,0,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,fp8,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,128,0,1,fp8,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,24,128,0,1,fp8,fp8,0,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,float16,0,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,1,128,0,1,fp8,fp8,0,0.036687999963760376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,float16,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,fp8,0,0.04120533416668574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,float16,0,0.04142399877309799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,fp8,0,0.04172799984614054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,4,128,0,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,float16,0,0.04026666780312856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,fp8,0,0.04073066761096319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,24,8,128,0,1,fp8,fp8,0,0.03833066672086716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,24,128,0,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,float16,0,0.0296426663796107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,1,128,0,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,float16,0,0.030106666187445324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,128,0,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,fp8,0,0.02994133283694585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,float16,0,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,4,128,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,fp8,0,0.030970667799313862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,fp8,0,0.021594665944576263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,float16,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,1,128,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,2,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,4,128,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,24,8,128,0,1,fp8,fp8,0,0.019834666202465694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,24,128,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,1,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,2,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,4,128,0,1,fp8,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,24,8,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,24,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,1,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,2,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,4,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,24,8,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,float16,0,0.2697226603825887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,fp8,0,0.2690986593564351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,1,128,0,1,fp8,fp8,0,0.24911999702453613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,float16,0,0.27482134103775024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,float16,0,0.27215466896692914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,fp8,0,0.2710826595624288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,2,128,0,1,fp8,fp8,0,0.2555946707725525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,fp8,0,0.2749813397725423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,4,128,0,1,fp8,fp8,0,0.2598346670468648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,float16,0,0.2817280093828837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,fp8,0,0.27961599826812744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,24,8,128,0,1,fp8,fp8,0,0.267685333887736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,float16,0,0.15917332967122397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,fp8,0,0.15710399548212686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,24,128,0,1,fp8,fp8,0,0.156058669090271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,float16,0,0.14404799540837607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,fp8,0,0.14317333698272705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,1,128,0,1,fp8,fp8,0,0.12983999649683634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,float16,0,0.1444533367951711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,fp8,0,0.14296533664067587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,2,128,0,1,fp8,fp8,0,0.13384532928466797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,float16,0,0.14387733737627664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,fp8,0,0.14518933494885763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,4,128,0,1,fp8,fp8,0,0.1356106698513031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,float16,0,0.14824000000953674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,fp8,0,0.14827199776967367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,24,8,128,0,1,fp8,fp8,0,0.14316800236701965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,float16,0,0.08865066369374593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,float16,0,0.08071466783682506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,fp8,0,0.08846933643023173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,24,128,0,1,fp8,fp8,0,0.0881066620349884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,float16,0,0.0804746647675832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,fp8,0,0.07868266602357228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,128,0,1,fp8,fp8,0,0.07222933570543925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,1,128,0,1,fp8,fp8,0,0.0718560020128886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,fp8,0,0.0804799993832906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,2,128,0,1,fp8,fp8,0,0.07227733234564464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,float16,0,0.08006933331489563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,fp8,0,0.08036266764005025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,float16,0,0.08107199768225352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,fp8,0,0.08026133477687836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,24,8,128,0,1,fp8,fp8,0,0.07588799794514973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,fp8,0,0.051455999414126076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,24,128,0,1,fp8,fp8,0,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,float16,0,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,1,128,0,1,fp8,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,2,128,0,1,fp8,fp8,0,0.04345599810282389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,128,0,1,fp8,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,float16,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,float16,0,0.04783466458320618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,fp8,0,0.04972266654173533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,4,128,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,float16,0,0.04821333289146423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,fp8,0,0.04846400022506714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,fp8,0,0.032885332902272545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,24,128,0,1,fp8,fp8,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,float16,0,0.032069332897663116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,fp8,0,0.03107200066248576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,1,128,0,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,float16,0,0.032698666055997215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,128,0,1,fp8,fp8,0,0.03029866764942805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,2,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,float16,0,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,4,128,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,24,128,0,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,float16,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,fp8,0,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,1,128,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,2,128,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,float16,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,128,0,1,fp8,fp8,0,0.022122666239738464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,fp8,0,0.024720000723997753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,4,128,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,float16,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,fp8,0,0.024714666108290356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,24,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,1,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,2,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,4,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,24,8,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,24,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,1,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,2,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,4,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,24,8,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,24,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,1,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,2,128,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,4,128,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,24,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,float16,0,0.20120000839233398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,fp8,0,0.20227199792861938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,1,128,0,1,fp8,fp8,0,0.1875306765238444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,float16,0,0.20259199539820352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,fp8,0,0.2020960052808126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,2,128,0,1,fp8,fp8,0,0.19100266695022583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,float16,0,0.20428800582885742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,fp8,0,0.203274667263031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,4,128,0,1,fp8,fp8,0,0.19195199012756348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,float16,0,0.20738667249679565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,fp8,0,0.2053013245264689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,24,8,128,0,1,fp8,fp8,0,0.19882667064666748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,fp8,0,0.10759466886520386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,float16,0,0.11642666657765706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,fp8,0,0.1172106663386027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,24,128,0,1,fp8,fp8,0,0.11581866939862569
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,float16,0,0.10831466317176819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,1,128,0,1,fp8,fp8,0,0.09892266988754272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,128,0,1,fp8,fp8,0,0.09910399715105693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,float16,0,0.10733866691589355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,fp8,0,0.10779733459154765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,2,128,0,1,fp8,fp8,0,0.09857066472371419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,float16,0,0.1090186635653178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,fp8,0,0.10929600397745769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,float16,0,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,24,8,128,0,1,fp8,fp8,0,0.10318400462468465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,float16,0,0.06369600196679433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,fp8,0,0.06570666531721751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,fp8,0,0.06229333579540253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,24,128,0,1,fp8,fp8,0,0.060453335444132485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,float16,0,0.062650665640831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,fp8,0,0.062458669145902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,1,128,0,1,fp8,fp8,0,0.05787733197212219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,float16,0,0.062234664956728615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,2,128,0,1,fp8,fp8,0,0.057146668434143066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,float16,0,0.06431999802589417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,128,0,1,fp8,fp8,0,0.0589279979467392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,fp8,0,0.06393066545327504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,4,128,0,1,fp8,fp8,0,0.05719999969005585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,float16,0,0.06379733482996623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,fp8,0,0.06339199841022491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,float16,0,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,24,128,0,1,fp8,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,float16,0,0.041050667564074196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,fp8,0,0.040181333820025124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,1,128,0,1,fp8,fp8,0,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,fp8,0,0.0412266676624616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,float16,0,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,fp8,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,2,128,0,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,float16,0,0.04083733260631561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,4,128,0,1,fp8,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,float16,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,128,0,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,24,8,128,0,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,fp8,0,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,1,128,0,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,2,128,0,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,4,128,0,1,fp8,fp8,0,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,fp8,0,0.026757332185904186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,24,8,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,24,128,0,1,fp8,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,1,128,0,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,float16,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,2,128,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,4,128,0,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,24,8,128,0,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,1,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,2,128,0,1,fp8,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,4,128,0,1,fp8,fp8,0,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,24,8,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,24,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,1,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,2,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,4,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,24,8,128,0,1,fp8,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,24,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,1,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,2,128,0,1,fp8,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,fp8,0,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,4,128,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,24,8,128,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,float16,0,0.16607466340065002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,fp8,0,0.1667733391125997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,1,128,0,1,fp8,fp8,0,0.15584533413251242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,fp8,0,0.16642133394877115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,float16,0,0.16737600167592367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,2,128,0,1,fp8,fp8,0,0.15609600146611533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,float16,0,0.1678346594174703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,fp8,0,0.16734933853149414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,4,128,0,1,fp8,fp8,0,0.15787200133005777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,float16,0,0.16940265893936157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,fp8,0,0.16765866676966348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,float16,0,0.09283733367919922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,fp8,0,0.09267733494440715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,24,8,128,0,1,fp8,fp8,0,0.16180266936620077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,24,128,0,1,fp8,fp8,0,0.09034132957458496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,float16,0,0.09100266297658284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,fp8,0,0.09160533547401428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,1,128,0,1,fp8,fp8,0,0.08672533432642619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,float16,0,0.09090666969617207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,fp8,0,0.09169066945711772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,2,128,0,1,fp8,fp8,0,0.08583999673525493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,float16,0,0.09199999769528706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,fp8,0,0.09061333537101746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,4,128,0,1,fp8,fp8,0,0.0865280032157898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,fp8,0,0.05589866638183594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,float16,0,0.09194133679072063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,fp8,0,0.09191999832789104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,24,8,128,0,1,fp8,fp8,0,0.086517333984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,float16,0,0.05522133409976959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,24,128,0,1,fp8,fp8,0,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,float16,0,0.05540800094604492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,fp8,0,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,1,128,0,1,fp8,fp8,0,0.050928001602490745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,float16,0,0.05507733424504598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,fp8,0,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,2,128,0,1,fp8,fp8,0,0.05179200073083242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,float16,0,0.05526400109132131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,fp8,0,0.05402666827042898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,4,128,0,1,fp8,fp8,0,0.05096533397833506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,128,0,1,fp8,fp8,0,0.035002666215101876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,float16,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,fp8,0,0.034389334420363106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,fp8,0,0.056015998125076294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,24,8,128,0,1,fp8,fp8,0,0.051685333251953125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,float16,0,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,128,0,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,float16,0,0.03385599950949351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,128,0,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,1,128,0,1,fp8,fp8,0,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,float16,0,0.03492266684770584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,fp8,0,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,float16,0,0.034314667185147606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,fp8,0,0.03461866577466329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,float16,0,0.03395200024048487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,fp8,0,0.0349386657277743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,24,8,128,0,1,fp8,fp8,0,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,fp8,0,0.024634666740894318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,24,128,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,1,128,0,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,128,0,1,fp8,fp8,0,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,2,128,0,1,fp8,fp8,0,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,fp8,0,0.022890667120615642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,float16,0,0.023797333240509033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,fp8,0,0.023984000086784363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,24,8,128,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,fp8,0,0.020234666764736176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,24,128,0,1,fp8,fp8,0,0.02046400060256322
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,1,128,0,1,fp8,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,2,128,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,4,128,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,24,8,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,float16,0,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,24,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,1,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,2,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,4,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,24,8,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,24,128,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,1,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,4,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,24,8,128,0,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,24,128,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,1,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,4,128,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,24,8,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,128,0,1,fp8,fp8,0,5.970928192138672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,128,0,1,fp8,fp8,0,6.01310920715332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,float16,0,7.600853602091472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,fp8,0,7.716133117675781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,float16,0,7.759674708048503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,fp8,0,7.917119979858398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,float16,0,7.865776062011719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,fp8,0,7.799477259318034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,float16,0,3.996005376180013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,128,0,1,fp8,fp8,0,6.081626892089844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,fp8,0,4.13427734375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,128,0,1,fp8,fp8,0,3.272533416748047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,128,0,1,fp8,fp8,0,6.138266881306966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,float16,0,7.903226852416992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,fp8,0,7.884122848510742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,float16,0,3.9427413940429688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,128,0,1,fp8,fp8,0,3.0384480158487954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,fp8,0,3.7081333796183267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,fp8,0,3.8881279627482095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,float16,0,3.979184150695801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,128,0,1,fp8,fp8,0,3.0628639856974282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,float16,0,3.9743146896362305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,128,0,1,fp8,fp8,0,3.147024154663086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,fp8,0,3.989210764567057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,float16,0,1.9920105934143066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,float16,0,4.0724639892578125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,fp8,0,2.010309378306071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,fp8,0,3.8493919372558594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,128,0,1,fp8,fp8,0,3.1158294677734375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,128,0,1,fp8,fp8,0,2.0962026913960776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,float16,0,1.931008021036784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,128,0,1,fp8,fp8,0,1.693552017211914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,fp8,0,1.964138666788737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,float16,0,1.9433172543843586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,fp8,0,1.9603892962137859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,128,0,1,fp8,fp8,0,1.8933653831481934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,float16,0,1.9459999402364094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,fp8,0,1.9061813354492188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,128,0,1,fp8,fp8,0,1.6880426406860352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,float16,0,1.1352159976959229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,float16,0,1.922368049621582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,fp8,0,1.0812853177388508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,128,0,1,fp8,fp8,0,1.0092159907023113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,fp8,0,1.9919946988423665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,128,0,1,fp8,fp8,0,1.7339466412862141
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,float16,0,1.0666399796803792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,fp8,0,1.0468746821085613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,128,0,1,fp8,fp8,0,0.9620373249053955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,float16,0,1.057909329732259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,fp8,0,1.050096035003662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,128,0,1,fp8,fp8,0,0.9501012961069742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,float16,0,1.1781919797261555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,fp8,0,1.0534826914469402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,128,0,1,fp8,fp8,0,0.9313706556955973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,float16,0,1.058736006418864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,128,0,1,fp8,fp8,0,0.9429653485616049
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,fp8,0,1.0782026449839275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,128,0,1,fp8,fp8,0,3.5477011998494468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,float16,0,4.626554807027181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,fp8,0,4.560442606608073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,128,0,1,fp8,fp8,0,3.5352748235066733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,float16,0,4.664197285970052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,fp8,0,4.6160586675008135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,float16,0,4.537594795227051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,fp8,0,4.559103965759277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,float16,0,2.2779626846313477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,128,0,1,fp8,fp8,0,3.574629465738932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,fp8,0,2.270458698272705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,128,0,1,fp8,fp8,0,2.2667999267578125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,float16,0,2.277242660522461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,128,0,1,fp8,fp8,0,3.6710611979166665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,float16,0,4.508960088094075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,fp8,0,4.669482549031575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,128,0,1,fp8,fp8,0,1.8339893023173015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,fp8,0,2.2585813204447427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,float16,0,2.208720048268636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,fp8,0,2.1741493542989097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,128,0,1,fp8,fp8,0,1.8380746841430664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,float16,0,2.291621367136637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,fp8,0,2.2536746660868325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,128,0,1,fp8,fp8,0,1.848698616027832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,float16,0,1.193658669789632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,float16,0,2.288266658782959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,128,0,1,fp8,fp8,0,1.9353493054707844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,fp8,0,2.282479921976725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,fp8,0,1.294373353322347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,128,0,1,fp8,fp8,0,1.2407519817352295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,float16,0,1.1445653438568115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,fp8,0,1.1665013631184895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,128,0,1,fp8,fp8,0,1.077402671178182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,float16,0,1.1517813205718994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,128,0,1,fp8,fp8,0,1.0334880352020264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,fp8,0,1.1785120169321697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,float16,0,1.1790186564127605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,fp8,0,1.156874656677246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,128,0,1,fp8,fp8,0,1.1045546531677246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,float16,0,1.1794666449228923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,float16,0,0.6824266910552979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,fp8,0,1.1872159639994304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,fp8,0,0.706549326578776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,128,0,1,fp8,fp8,0,1.020565350850423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,128,0,1,fp8,fp8,0,0.6564799944559733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,float16,0,0.6556906700134277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,fp8,0,0.6509333451588949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,128,0,1,fp8,fp8,0,0.5865919987360636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,128,0,1,fp8,fp8,0,0.6039946476618449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,float16,0,0.6524266799290975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,fp8,0,0.652730663617452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,fp8,0,0.6601706743240356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,float16,0,0.6530186732610067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,128,0,1,fp8,fp8,0,0.5871839920679728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,float16,0,0.6605600118637085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,fp8,0,0.6605226596196493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,128,0,1,fp8,fp8,0,0.5979733467102051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,128,0,1,fp8,fp8,0,2.5599573453267417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,float16,0,3.252101262410482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,fp8,0,3.2968320846557617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,128,0,1,fp8,fp8,0,2.5681066513061523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,float16,0,3.2051893870035806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,fp8,0,3.244762738545736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,float16,0,3.229541460673014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,fp8,0,3.244330724080404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,float16,0,1.8151466051737468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,128,0,1,fp8,fp8,0,2.587909380594889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,fp8,0,1.7362240155537922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,128,0,1,fp8,fp8,0,1.5276373227437336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,float16,0,1.741967995961507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,128,0,1,fp8,fp8,0,2.6319519678751626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,float16,0,3.2024319966634116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,fp8,0,3.2885119120279946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,128,0,1,fp8,fp8,0,1.3473226229349773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,fp8,0,1.7061813672383626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,float16,0,1.586143970489502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,fp8,0,1.5683147112528484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,128,0,1,fp8,fp8,0,1.4790933926900227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,float16,0,1.5898613929748535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,fp8,0,1.6344799995422363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,128,0,1,fp8,fp8,0,1.605690638224284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,float16,0,0.9198880195617676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,fp8,0,0.8966186841328939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,128,0,1,fp8,fp8,0,0.8393440246582031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,float16,0,1.6224907239278157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,fp8,0,1.6873547236124675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,128,0,1,fp8,fp8,0,1.3895680109659831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,float16,0,0.9639840126037598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,fp8,0,0.8470453421274821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,128,0,1,fp8,fp8,0,0.7799893220265707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,float16,0,0.8629013697306315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,fp8,0,0.8808746337890625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,128,0,1,fp8,fp8,0,0.8058773676554362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,float16,0,0.9897759755452474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,fp8,0,0.8545920054117838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,128,0,1,fp8,fp8,0,0.7889493306477865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,float16,0,0.8612799644470215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,float16,0,0.5137279828389486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,fp8,0,0.8626026312510172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,128,0,1,fp8,fp8,0,0.777125358581543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,fp8,0,0.5141173203786215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,128,0,1,fp8,fp8,0,0.5023839871088663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,128,0,1,fp8,fp8,0,0.4387199878692627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,float16,0,0.47968534628550213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,128,0,1,fp8,fp8,0,0.4428746700286865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,fp8,0,0.4881440003712972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,float16,0,0.48660798867543537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,float16,0,0.48144535223642987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,fp8,0,0.48308801651000977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,fp8,0,0.503877321879069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,128,0,1,fp8,fp8,0,0.4468746582667033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,float16,0,0.4941973288853963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,fp8,0,0.500767985979716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,128,0,1,fp8,fp8,0,0.45346665382385254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,128,0,1,fp8,fp8,0,3.4309333165486655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,float16,0,4.422879854838054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,fp8,0,4.400837262471517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,128,0,1,fp8,fp8,0,3.451610565185547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,float16,0,4.437664031982422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,fp8,0,4.271674791971843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,float16,0,4.231856028238933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,float16,0,2.1899733543395996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,128,0,1,fp8,fp8,0,3.5034348169962564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,fp8,0,2.415541330973307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,fp8,0,4.320581436157227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,128,0,1,fp8,fp8,0,3.5426559448242188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,float16,0,4.462832132975261
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,fp8,0,4.459770520528157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,128,0,1,fp8,fp8,0,1.902016003926595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,float16,0,2.1290720303853354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,fp8,0,2.0880533854166665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,128,0,1,fp8,fp8,0,1.8774666786193848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,float16,0,2.0664800008138022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,128,0,1,fp8,fp8,0,1.9050347010294597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,fp8,0,2.067263921101888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,float16,0,2.098090648651123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,fp8,0,2.15995200475057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,128,0,1,fp8,fp8,0,1.9009067217508953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,float16,0,1.1205813090006511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,fp8,0,1.1492746671040852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,float16,0,2.0999147097269693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,128,0,1,fp8,fp8,0,1.0493600368499756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,fp8,0,2.172191937764486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,128,0,1,fp8,fp8,0,1.8427252769470215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,float16,0,1.0929173628489177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,128,0,1,fp8,fp8,0,0.932037353515625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,fp8,0,1.0862239996592205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,float16,0,1.0774613221486409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,fp8,0,1.106922705968221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,128,0,1,fp8,fp8,0,0.9416159788767496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,float16,0,1.1135733127593994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,fp8,0,1.0765120188395183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,128,0,1,fp8,fp8,0,0.9837280114491781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,float16,0,1.0930293401082356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,float16,0,0.6229226589202881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,fp8,0,1.1140639781951904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,fp8,0,0.6178719997406006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,128,0,1,fp8,fp8,0,1.0226186911265056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,128,0,1,fp8,fp8,0,0.5770560105641683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,float16,0,0.5777706702550253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,fp8,0,0.5847946802775065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,128,0,1,fp8,fp8,0,0.5237653255462646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,float16,0,0.5847626527150472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,fp8,0,0.5930240154266357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,128,0,1,fp8,fp8,0,0.5236959854761759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,float16,0,0.5904800097147623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,fp8,0,0.602949341138204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,128,0,1,fp8,fp8,0,0.5289173523585001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,float16,0,0.5994879802068075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,fp8,0,0.6022186676661173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,float16,0,0.36260799566904706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,128,0,1,fp8,fp8,0,0.5378133455912272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,fp8,0,0.36392533779144287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,128,0,1,fp8,fp8,0,0.33238399028778076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,float16,0,0.3452053467432658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,fp8,0,0.342303991317749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,128,0,1,fp8,fp8,0,0.3143893281618754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,float16,0,0.3420480092366536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,fp8,0,0.34667734305063885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,128,0,1,fp8,fp8,0,0.322437326113383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,float16,0,0.3516639868418376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,fp8,0,0.3532693386077881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,fp8,0,0.34378667672475177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,128,0,1,fp8,fp8,0,0.3155626654624939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,float16,0,0.3445653518040975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,128,0,1,fp8,fp8,0,0.326800008614858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,128,0,1,fp8,fp8,0,2.102757294972738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,float16,0,2.5037333170572915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,fp8,0,2.601802666982015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,128,0,1,fp8,fp8,0,2.1174453099568686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,float16,0,2.5001920064290366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,fp8,0,2.5966453552246094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,float16,0,2.6801973978678384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,fp8,0,2.521706740061442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,float16,0,1.3576052983601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,128,0,1,fp8,fp8,0,2.1338346799214682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,fp8,0,1.4290612538655598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,128,0,1,fp8,fp8,0,1.3879893620808919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,128,0,1,fp8,fp8,0,2.189199924468994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,float16,0,1.3565866152445476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,float16,0,2.566234588623047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,fp8,0,2.5968000094095864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,fp8,0,1.2703200181325276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,128,0,1,fp8,fp8,0,1.0896480083465576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,float16,0,1.2802399794260662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,fp8,0,1.2725813388824463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,128,0,1,fp8,fp8,0,1.097434679667155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,float16,0,1.2726506392161052
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,128,0,1,fp8,fp8,0,1.1863146622975667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,fp8,0,1.4092373847961426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,float16,0,0.7283199628194174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,float16,0,1.2998133500417073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,fp8,0,0.7094133694966634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,fp8,0,1.293445348739624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,128,0,1,fp8,fp8,0,0.6494559844334921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,128,0,1,fp8,fp8,0,1.144261360168457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,float16,0,0.6789759794871012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,fp8,0,0.67795729637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,128,0,1,fp8,fp8,0,0.6094186703364054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,float16,0,0.6829813321431478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,fp8,0,0.6755413214365641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,128,0,1,fp8,fp8,0,0.5962080160776774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,float16,0,0.6787839730580648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,fp8,0,0.6839306354522705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,128,0,1,fp8,fp8,0,0.6011306842168173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,float16,0,0.6861920356750488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,float16,0,0.3933653434117635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,fp8,0,0.692741314570109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,128,0,1,fp8,fp8,0,0.6127466758092245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,fp8,0,0.4005546569824219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,128,0,1,fp8,fp8,0,0.3620106776555379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,float16,0,0.36715201536814374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,128,0,1,fp8,fp8,0,0.33928000926971436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,fp8,0,0.3683360020319621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,float16,0,0.3717333475748698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,fp8,0,0.37176533540089923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,128,0,1,fp8,fp8,0,0.3405226469039917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,float16,0,0.3782186508178711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,fp8,0,0.3764959971110026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,128,0,1,fp8,fp8,0,0.3430986801783244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,float16,0,0.3826719919840495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,fp8,0,0.38598934809366864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,128,0,1,fp8,fp8,0,0.3486826817194621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,float16,0,0.23817066351572672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,fp8,0,0.23979200919469199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,128,0,1,fp8,fp8,0,0.223797341187795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,float16,0,0.22662399212519327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,fp8,0,0.2258666753768921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,128,0,1,fp8,fp8,0,0.2114773392677307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,float16,0,0.22567999362945557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,fp8,0,0.22815465927124023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,128,0,1,fp8,fp8,0,0.2108746568361918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,float16,0,0.22780267397562662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,fp8,0,0.2283680041631063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,128,0,1,fp8,fp8,0,0.20972800254821777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,float16,0,0.2318506638209025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,fp8,0,0.2322346568107605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,128,0,1,fp8,fp8,0,0.2180160085360209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,float16,0,2.5802772839864097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,128,0,1,fp8,fp8,0,2.167114734649658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,fp8,0,2.577317396799723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,float16,0,2.5105493863423667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,128,0,1,fp8,fp8,0,2.17959992090861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,fp8,0,2.5269920031229653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,float16,0,2.5676159858703613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,fp8,0,2.628762722015381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,float16,0,1.3596852620442708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,fp8,0,1.3858933448791504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,128,0,1,fp8,fp8,0,2.2510719299316406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,128,0,1,fp8,fp8,0,1.3691840171813965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,float16,0,1.282650629679362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,float16,0,2.676565488179525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,128,0,1,fp8,fp8,0,2.332335948944092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,fp8,0,2.6472959518432617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,fp8,0,1.2613386313120525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,128,0,1,fp8,fp8,0,1.1377867062886555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,float16,0,1.2732160091400146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,128,0,1,fp8,fp8,0,1.1221280097961426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,fp8,0,1.2673813501993816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,float16,0,1.288922627766927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,fp8,0,1.2807146708170574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,128,0,1,fp8,fp8,0,1.2697546482086182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,float16,0,0.7015199661254883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,float16,0,1.3213120301564534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,fp8,0,0.7224533557891846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,128,0,1,fp8,fp8,0,1.1908799807230632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,fp8,0,1.3190720081329346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,128,0,1,fp8,fp8,0,0.6730079650878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,float16,0,0.670464038848877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,float16,0,0.6619146664937338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,fp8,0,0.6727413336435953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,128,0,1,fp8,fp8,0,0.5804160038630167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,fp8,0,0.6626240015029907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,float16,0,0.7089653015136719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,fp8,0,0.6687626838684082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,128,0,1,fp8,fp8,0,0.5901120106379191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,128,0,1,fp8,fp8,0,0.5974613428115845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,float16,0,0.6789440313975016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,fp8,0,0.6895306905110677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,float16,0,0.3833706776301066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,128,0,1,fp8,fp8,0,0.6077226797739664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,fp8,0,0.39760533968607586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,128,0,1,fp8,fp8,0,0.35286398728688556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,float16,0,0.35225598017374676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,fp8,0,0.35317333539326984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,128,0,1,fp8,fp8,0,0.3220799962679545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,float16,0,0.35649601618448895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,fp8,0,0.35654934247334796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,128,0,1,fp8,fp8,0,0.32382933298746747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,float16,0,0.3619946638743083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,fp8,0,0.3632426659266154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,128,0,1,fp8,fp8,0,0.3288159966468811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,float16,0,0.36748266220092773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,fp8,0,0.37003199259440106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,128,0,1,fp8,fp8,0,0.3367573420206706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,float16,0,0.22233599424362183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,fp8,0,0.2246239980061849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,128,0,1,fp8,fp8,0,0.2076479991277059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,float16,0,0.20521599054336548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,fp8,0,0.20452799399693808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,128,0,1,fp8,fp8,0,0.18649599949518839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,float16,0,0.20426666736602783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,fp8,0,0.20442134141921997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,128,0,1,fp8,fp8,0,0.18902399142583212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,float16,0,0.20732800165812174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,fp8,0,0.21521600087483725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,128,0,1,fp8,fp8,0,0.20009599129358926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,float16,0,0.14074132839838663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,fp8,0,0.20856533447901407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,128,0,1,fp8,fp8,0,0.1957333286603292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,fp8,0,0.1383999983469645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,float16,0,0.21502933899561563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,fp8,0,0.14415466785430908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,128,0,1,fp8,fp8,0,0.13638933499654135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,float16,0,0.13801067074139914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,128,0,1,fp8,fp8,0,0.12845333417256674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,fp8,0,0.13979199528694153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,float16,0,0.1387999951839447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,fp8,0,0.13838932911554971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,fp8,0,0.1416266659895579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,128,0,1,fp8,fp8,0,0.12991467118263245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,float16,0,0.1378773351510366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,128,0,1,fp8,fp8,0,0.13052800297737122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,float16,0,0.13794133067131042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,128,0,1,fp8,fp8,0,0.13025066256523132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,float16,0,1.5708640416463215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,128,0,1,fp8,fp8,0,1.3879307111104329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,fp8,0,1.5676746368408203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,float16,0,1.5990187327067058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,fp8,0,1.6037440299987793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,128,0,1,fp8,fp8,0,1.4072160720825195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,float16,0,1.6040959358215332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,fp8,0,1.633589267730713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,128,0,1,fp8,fp8,0,1.4792373975118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,float16,0,0.88483198483785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,float16,0,1.6865919431050618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,fp8,0,0.8920746644337972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,fp8,0,1.664415995279948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,128,0,1,fp8,fp8,0,1.4869920412699382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,128,0,1,fp8,fp8,0,0.8342239856719971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,float16,0,0.8088853359222412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,fp8,0,0.8057440121968588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,128,0,1,fp8,fp8,0,0.712826649347941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,float16,0,0.8835519949595133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,fp8,0,0.8096213340759277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,128,0,1,fp8,fp8,0,0.72435196240743
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,float16,0,0.8154346942901611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,fp8,0,0.8266826470692953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,128,0,1,fp8,fp8,0,0.7321973641713461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,float16,0,0.8401386737823486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,fp8,0,0.8484746615091959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,float16,0,0.4633280038833618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,fp8,0,0.4679466485977173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,128,0,1,fp8,fp8,0,0.7753439744313558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,128,0,1,fp8,fp8,0,0.42610665162404376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,float16,0,0.42262399196624756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,fp8,0,0.42554668585459393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,128,0,1,fp8,fp8,0,0.3834826548894246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,float16,0,0.42504533131917316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,fp8,0,0.42773866653442383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,128,0,1,fp8,fp8,0,0.3851413329442342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,float16,0,0.4342613220214844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,fp8,0,0.4346346855163574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,128,0,1,fp8,fp8,0,0.3936320145924886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,float16,0,0.4448426564534505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,fp8,0,0.44602131843566895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,128,0,1,fp8,fp8,0,0.4024266799290975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,fp8,0,0.2325119972229004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,float16,0,0.256714661916097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,fp8,0,0.25919467210769653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,128,0,1,fp8,fp8,0,0.23837866385777792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,128,0,1,fp8,fp8,0,0.2177600065867106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,float16,0,0.2328746716181437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,128,0,1,fp8,fp8,0,0.2158986727396647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,float16,0,0.23326400915781656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,fp8,0,0.23293334245681763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,float16,0,0.23935467004776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,fp8,0,0.23942933479944864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,128,0,1,fp8,fp8,0,0.22219200929005942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,128,0,1,fp8,fp8,0,0.14402666687965393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,float16,0,0.2460213303565979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,fp8,0,0.24794665972391763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,128,0,1,fp8,fp8,0,0.228602667649587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,float16,0,0.1527679959932963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,fp8,0,0.1537866691748301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,float16,0,0.14148799578348795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,fp8,0,0.13980266451835632
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,128,0,1,fp8,fp8,0,0.1276746690273285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,float16,0,0.14119999607404074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,fp8,0,0.14242133498191833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,128,0,1,fp8,fp8,0,0.1281760036945343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,float16,0,0.1420960028966268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,fp8,0,0.1420906682809194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,128,0,1,fp8,fp8,0,0.13116266330083212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,float16,0,0.14628266294797262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,fp8,0,0.14722667137781778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,128,0,1,fp8,fp8,0,0.13806933164596558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,fp8,0,0.09885332981745402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,float16,0,0.10060800115267436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,fp8,0,0.10147733489672343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,128,0,1,fp8,fp8,0,0.09513599673906963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,float16,0,0.09820266564687093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,128,0,1,fp8,fp8,0,0.09258133172988892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,fp8,0,0.0993280013402303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,float16,0,0.09821866949399312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,fp8,0,0.09701333443323772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,128,0,1,fp8,fp8,0,0.09269866347312927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,128,0,1,fp8,fp8,0,0.09278399745623271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,float16,0,0.09865599870681763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,128,0,1,fp8,fp8,0,0.09308800101280212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,float16,0,0.09888533751169841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,fp8,0,0.09914132952690125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,float16,0,1.7468159993489583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,128,0,1,fp8,fp8,0,1.5386719703674316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,fp8,0,1.7353173891703289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,float16,0,1.7740586598714192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,128,0,1,fp8,fp8,0,1.557429313659668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,fp8,0,1.750208059946696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,float16,0,1.773754596710205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,128,0,1,fp8,fp8,0,1.5799840291341145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,fp8,0,1.8016799290974934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,float16,0,0.9710506598154703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,fp8,0,0.9821386337280273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,float16,0,1.8593173027038574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,128,0,1,fp8,fp8,0,1.6519412994384766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,fp8,0,1.8785173098246257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,128,0,1,fp8,fp8,0,0.8826239903767904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,float16,0,0.8802560170491537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,128,0,1,fp8,fp8,0,0.774890661239624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,fp8,0,0.8697546323140463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,float16,0,0.8706986904144287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,fp8,0,0.883237361907959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,128,0,1,fp8,fp8,0,0.7838773727416992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,float16,0,0.8946399688720703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,fp8,0,0.8906773726145426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,128,0,1,fp8,fp8,0,0.8024640083312988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,float16,0,0.5008533398310343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,float16,0,0.9143626689910889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,fp8,0,0.507909337679545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,float16,0,0.44865067799886066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,fp8,0,0.9276213645935059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,128,0,1,fp8,fp8,0,0.8326666355133057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,float16,0,0.4493066469828288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,128,0,1,fp8,fp8,0,0.46304531892140705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,fp8,0,0.45205867290496826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,128,0,1,fp8,fp8,0,0.4088746706644694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,128,0,1,fp8,fp8,0,0.40832531452178955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,fp8,0,0.45500266551971436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,float16,0,0.4763946533203125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,float16,0,0.4584426482518514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,fp8,0,0.46105066935221356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,128,0,1,fp8,fp8,0,0.4156159957249959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,fp8,0,0.48178664843241376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,128,0,1,fp8,fp8,0,0.4342293341954549
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,float16,0,0.26845866441726685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,fp8,0,0.2736053268114726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,128,0,1,fp8,fp8,0,0.25071465969085693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,float16,0,0.23979200919469199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,128,0,1,fp8,fp8,0,0.2246346672375997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,fp8,0,0.24088533719380698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,128,0,1,fp8,fp8,0,0.22206934293111166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,float16,0,0.24292266368865967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,fp8,0,0.24500799179077148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,float16,0,0.24693334102630615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,fp8,0,0.25013333559036255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,128,0,1,fp8,fp8,0,0.22787733872731528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,float16,0,0.25572266181310016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,fp8,0,0.2596693237622579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,128,0,1,fp8,fp8,0,0.23704532782236734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,float16,0,0.1544426679611206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,fp8,0,0.1581013302008311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,128,0,1,fp8,fp8,0,0.14604266484578451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,float16,0,0.1356053352355957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,float16,0,0.1414400041103363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,fp8,0,0.13565333684285483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,128,0,1,fp8,fp8,0,0.12436800201733907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,float16,0,0.13597333431243896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,fp8,0,0.13740266362826029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,128,0,1,fp8,fp8,0,0.1272053321202596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,fp8,0,0.1421173314253489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,128,0,1,fp8,fp8,0,0.13370133439699808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,float16,0,0.14763733744621277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,fp8,0,0.14659733573595682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,128,0,1,fp8,fp8,0,0.13892799615859985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,128,0,1,fp8,fp8,0,0.0823413332303365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,float16,0,0.09340266386667888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,fp8,0,0.09537600477536519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,128,0,1,fp8,fp8,0,0.09072533249855042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,float16,0,0.08944533268610637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,fp8,0,0.08875200152397156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,float16,0,0.08878399928410848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,fp8,0,0.0899786651134491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,128,0,1,fp8,fp8,0,0.08272000153859456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,128,0,1,fp8,fp8,0,0.08477333188056946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,float16,0,0.09091732899347942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,fp8,0,0.09064533313115437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,128,0,1,fp8,fp8,0,0.08196266492207845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,float16,0,0.09070932865142822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,fp8,0,0.09077333410580952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,float16,0,0.0612960010766983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,128,0,1,fp8,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,fp8,0,0.06215466558933258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,fp8,0,0.06150400141874949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,128,0,1,fp8,fp8,0,0.060549333691596985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,float16,0,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,fp8,0,0.059989333152770996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,128,0,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,float16,0,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,128,0,1,fp8,fp8,0,0.05801600217819214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,float16,0,0.060165335734685264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,fp8,0,0.061903998255729675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,float16,0,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,fp8,0,0.06128533184528351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,128,0,1,fp8,fp8,0,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,float16,0,1.1474346319834392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,fp8,0,1.1458720366160076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,128,0,1,fp8,fp8,0,1.0226666927337646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,float16,0,1.155717372894287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,fp8,0,1.152901331583659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,128,0,1,fp8,fp8,0,1.0382400353749592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,float16,0,1.168613354365031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,fp8,0,1.187274694442749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,128,0,1,fp8,fp8,0,1.0605759620666504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,float16,0,1.2209866841634114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,float16,0,0.6602720022201538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,fp8,0,0.669157346089681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,128,0,1,fp8,fp8,0,1.1039893627166748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,fp8,0,1.2311999797821045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,128,0,1,fp8,fp8,0,0.6099040110905966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,float16,0,0.5816479921340942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,fp8,0,0.5855413277943929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,128,0,1,fp8,fp8,0,0.5271893342336019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,fp8,0,0.5867520173390707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,float16,0,0.5856693188349406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,fp8,0,0.6036106745402018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,128,0,1,fp8,fp8,0,0.5328480005264282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,float16,0,0.5966613292694092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,128,0,1,fp8,fp8,0,0.5453439950942993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,float16,0,0.3429653247197469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,float16,0,0.6181653340657552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,fp8,0,0.6286133527755737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,128,0,1,fp8,fp8,0,0.5662560065587362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,fp8,0,0.35150933265686035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,128,0,1,fp8,fp8,0,0.3202613393465678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,float16,0,0.30594666798909503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,fp8,0,0.3083466688791911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,128,0,1,fp8,fp8,0,0.2800426681836446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,fp8,0,0.31752000252405804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,float16,0,0.3085493246714274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,fp8,0,0.30983465909957886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,128,0,1,fp8,fp8,0,0.2832266688346863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,float16,0,0.31484800577163696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,128,0,1,fp8,fp8,0,0.28778133789698285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,float16,0,0.32578132549921673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,fp8,0,0.3299199938774109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,128,0,1,fp8,fp8,0,0.2980213363965352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,128,0,1,fp8,fp8,0,0.15506666898727417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,float16,0,0.18763200441996256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,fp8,0,0.19244800011316934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,128,0,1,fp8,fp8,0,0.17826666434605917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,float16,0,0.16569599509239197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,fp8,0,0.16667733589808145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,float16,0,0.16739734013875326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,fp8,0,0.16909333070119223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,128,0,1,fp8,fp8,0,0.15773866573969522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,float16,0,0.17122666041056314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,128,0,1,fp8,fp8,0,0.1665546695391337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,fp8,0,0.17347200711568198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,128,0,1,fp8,fp8,0,0.1604746679464976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,float16,0,0.17868266503016153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,fp8,0,0.09715200463930766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,fp8,0,0.182751993338267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,float16,0,0.10991999506950378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,fp8,0,0.11204800009727478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,128,0,1,fp8,fp8,0,0.1050986647605896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,float16,0,0.09690666198730469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,128,0,1,fp8,fp8,0,0.08891733487447102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,128,0,1,fp8,fp8,0,0.09158399701118469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,float16,0,0.09857599933942159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,fp8,0,0.09905067086219788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,128,0,1,fp8,fp8,0,0.09115733702977498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,float16,0,0.09880533814430237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,fp8,0,0.07025599976380666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,128,0,1,fp8,fp8,0,0.06555200119813283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,fp8,0,0.10043733318646748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,float16,0,0.06640000144640605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,float16,0,0.10127466917037964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,fp8,0,0.10292266805966695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,128,0,1,fp8,fp8,0,0.09703466296195984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,float16,0,0.068122665087382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,fp8,0,0.06619200110435486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,128,0,1,fp8,fp8,0,0.06087466577688853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,float16,0,0.06596800188223521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,float16,0,0.06623466809590657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,fp8,0,0.06605866551399231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,128,0,1,fp8,fp8,0,0.06195199986298879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,float16,0,0.06644266843795776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,fp8,0,0.06625600159168243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,128,0,1,fp8,fp8,0,0.06057066718737284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,fp8,0,0.0681279997030894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,float16,0,0.04796266555786133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,128,0,1,fp8,fp8,0,0.06277333199977875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,float16,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,float16,0,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,fp8,0,0.05180266499519348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,128,0,1,fp8,fp8,0,0.04952000081539154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,float16,0,0.049733335773150124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,fp8,0,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,128,0,1,fp8,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,fp8,0,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,128,0,1,fp8,fp8,0,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,128,0,1,fp8,fp8,0,0.04790933430194855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,fp8,0,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,128,0,1,fp8,fp8,0,0.04644800225893656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,float16,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,fp8,0,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,float16,0,1.2237226963043213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,fp8,0,1.2147733370463054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,128,0,1,fp8,fp8,0,1.1550239721934001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,float16,0,1.229423999786377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,fp8,0,1.2172959645589192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,128,0,1,fp8,fp8,0,1.1770506699879963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,float16,0,1.303328037261963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,fp8,0,1.285919984181722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,128,0,1,fp8,fp8,0,1.3943573633829753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,float16,0,1.2947893142700195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,float16,0,0.7098933060963949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,fp8,0,0.704416036605835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,fp8,0,1.2802240053812664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,128,0,1,fp8,fp8,0,1.3562666575113933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,128,0,1,fp8,fp8,0,0.6874933242797852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,float16,0,0.6186399857203165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,fp8,0,0.6134026845296224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,128,0,1,fp8,fp8,0,0.5878506501515707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,float16,0,0.6292906602223715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,fp8,0,0.6251999934514364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,float16,0,0.6459786494572958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,128,0,1,fp8,fp8,0,0.5930346647898356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,fp8,0,0.6461546818415324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,128,0,1,fp8,fp8,0,0.6898986498514811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,fp8,0,0.6510399977366129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,float16,0,0.658186674118042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,float16,0,0.36605334281921387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,fp8,0,0.3207040031750997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,fp8,0,0.35886398951212567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,128,0,1,fp8,fp8,0,0.6480106512705485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,128,0,1,fp8,fp8,0,0.35630400975545246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,float16,0,0.3216000000635783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,128,0,1,fp8,fp8,0,0.3038933277130127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,float16,0,0.3270240028699239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,fp8,0,0.3239519993464152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,128,0,1,fp8,fp8,0,0.30804266532262164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,float16,0,0.3370560010274251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,fp8,0,0.3348533312479655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,128,0,1,fp8,fp8,0,0.34348265329996747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,float16,0,0.3417973518371582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,fp8,0,0.3390880028406779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,128,0,1,fp8,fp8,0,0.33370665709177655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,float16,0,0.19546133279800415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,fp8,0,0.1939893364906311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,128,0,1,fp8,fp8,0,0.1912426749865214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,float16,0,0.17401599884033203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,fp8,0,0.17192532618840536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,128,0,1,fp8,fp8,0,0.1622719963391622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,float16,0,0.17615999778111777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,128,0,1,fp8,fp8,0,0.18059200048446655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,fp8,0,0.17683200041453043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,128,0,1,fp8,fp8,0,0.1644480029741923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,float16,0,0.18278400103251138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,fp8,0,0.18174932400385538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,float16,0,0.18595200777053833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,fp8,0,0.1828213334083557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,fp8,0,0.09480533003807068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,float16,0,0.1104746659596761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,128,0,1,fp8,fp8,0,0.1785386602083842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,fp8,0,0.11101866761843364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,128,0,1,fp8,fp8,0,0.10601066549619038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,float16,0,0.09530133008956909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,128,0,1,fp8,fp8,0,0.08893332878748576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,float16,0,0.09713600079218547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,float16,0,0.1029813289642334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,fp8,0,0.09611200292905171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,128,0,1,fp8,fp8,0,0.08867733677228291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,float16,0,0.10095999638239543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,fp8,0,0.10041067004203796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,128,0,1,fp8,fp8,0,0.09681600332260132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,fp8,0,0.10165866216023763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,fp8,0,0.05861333509286245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,128,0,1,fp8,fp8,0,0.10045866171518962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,float16,0,0.06187733511130015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,fp8,0,0.06397333244482677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,128,0,1,fp8,fp8,0,0.06190933287143707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,float16,0,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,128,0,1,fp8,fp8,0,0.05388799806435903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,float16,0,0.06039466460545858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,fp8,0,0.058517331878344216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,128,0,1,fp8,fp8,0,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,fp8,0,0.061194668213526406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,128,0,1,fp8,fp8,0,0.057850668827692665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,float16,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,fp8,0,0.06080000102519989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,128,0,1,fp8,fp8,0,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,float16,0,0.060602664947509766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,float16,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,128,0,1,fp8,fp8,0,0.038693333665529885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,fp8,0,0.039146666725476585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,128,0,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,fp8,0,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,128,0,1,fp8,fp8,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,float16,0,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,128,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,128,0,1,fp8,fp8,0,0.03177600105603536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,float16,0,0.039061332742373146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,128,0,1,fp8,fp8,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,128,0,1,fp8,fp8,0,0.030192000170548756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,float16,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,fp8,0,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,128,0,1,fp8,fp8,0,0.030037333567937214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,float16,0,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,128,0,1,fp8,fp8,0,0.03197333216667175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,float16,0,0.03309866786003113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,128,0,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,float16,0,1.031978686650594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,fp8,0,1.0327893098195393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,1,128,0,1,fp8,fp8,0,1.005189339319865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,float16,0,1.0491573015848796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,fp8,0,1.0389599800109863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,2,128,0,1,fp8,fp8,0,1.0004639625549316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,float16,0,1.1097813447316487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,fp8,0,1.0896053314208984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,4,128,0,1,fp8,fp8,0,1.2267200152079265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,float16,0,1.1059520244598389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,float16,0,0.6120853424072266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,fp8,0,0.6076480150222778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,fp8,0,1.0954986413319905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,16,128,0,1,fp8,fp8,0,0.602570652961731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,16,8,128,0,1,fp8,fp8,0,1.1904799938201904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,float16,0,0.5271306832631429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,fp8,0,0.5212800105412801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,1,128,0,1,fp8,fp8,0,0.5055306752522787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,float16,0,0.5339413483937582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,fp8,0,0.5311839977900187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,2,128,0,1,fp8,fp8,0,0.5162613391876221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,float16,0,0.5561813513437907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,fp8,0,0.549343983332316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,4,128,0,1,fp8,fp8,0,0.6086346705754598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,float16,0,0.5604533354441324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,fp8,0,0.5546079874038696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,float16,0,0.31590932607650757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,16,8,128,0,1,fp8,fp8,0,0.5657013257344564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,fp8,0,0.31115732590357464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,16,128,0,1,fp8,fp8,0,0.3131573398907979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,float16,0,0.27400000890096027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,fp8,0,0.27332266171773273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,1,128,0,1,fp8,fp8,0,0.2612160046895345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,float16,0,0.2793546716372172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,fp8,0,0.2765973409016927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,2,128,0,1,fp8,fp8,0,0.2653119961420695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,float16,0,0.2892373402913411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,fp8,0,0.28728532791137695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,4,128,0,1,fp8,fp8,0,0.28805333375930786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,float16,0,0.29264533519744873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,fp8,0,0.2884640097618103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,16,8,128,0,1,fp8,fp8,0,0.2845279971758525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,float16,0,0.16939733425776163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,fp8,0,0.16796799500783285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,16,128,0,1,fp8,fp8,0,0.16563733418782553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,float16,0,0.14798933267593384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,fp8,0,0.146314670642217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,1,128,0,1,fp8,fp8,0,0.1381066640218099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,float16,0,0.15032000343004862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,fp8,0,0.14963733156522116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,2,128,0,1,fp8,fp8,0,0.14173332850138345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,float16,0,0.1572480003039042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,fp8,0,0.15576533476511636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,fp8,0,0.09478933612505595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,4,128,0,1,fp8,fp8,0,0.15346666177113852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,float16,0,0.15993600090344748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,fp8,0,0.15657066305478415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,16,8,128,0,1,fp8,fp8,0,0.15413866440455118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,float16,0,0.09565866986910503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,fp8,0,0.08038933575153351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,16,128,0,1,fp8,fp8,0,0.09324799974759419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,float16,0,0.07961600025494893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,fp8,0,0.08070399860541026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,1,128,0,1,fp8,fp8,0,0.07459199925263722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,float16,0,0.08071466783682506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,2,128,0,1,fp8,fp8,0,0.07433066765467326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,float16,0,0.08441600203514099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,fp8,0,0.08438400427500407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,4,128,0,1,fp8,fp8,0,0.08568533261617024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,fp8,0,0.052757332722345986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,float16,0,0.08693333466847737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,fp8,0,0.08594133456548055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,16,8,128,0,1,fp8,fp8,0,0.08545066912968953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,128,0,1,fp8,fp8,0,0.04574933151404063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,float16,0,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,float16,0,0.05363733569780985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,16,128,0,1,fp8,fp8,0,0.05304533243179321
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,float16,0,0.04957866668701172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,fp8,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,float16,0,0.05166399975617727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,fp8,0,0.04929600159327189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,128,0,1,fp8,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,2,128,0,1,fp8,fp8,0,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,float16,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,4,128,0,1,fp8,fp8,0,0.048357332746187844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,fp8,0,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,float16,0,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,128,0,1,fp8,fp8,0,0.0305173322558403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,16,128,0,1,fp8,fp8,0,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,128,0,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,float16,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,fp8,0,0.032261334359645844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,float16,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,float16,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,fp8,0,0.03263466556866964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,4,128,0,1,fp8,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,128,0,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,16,8,128,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,16,128,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,float16,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,float16,0,0.026533332963784535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,2,128,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,4,128,0,1,fp8,fp8,0,0.02595199892918269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,16,8,128,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,16,128,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,fp8,0,0.024842667082945507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,1,128,0,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,2,128,0,1,fp8,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,4,128,0,1,fp8,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,float16,0,0.024458666642506916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,16,8,128,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,float16,0,0.47658665974934894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,fp8,0,0.4779786666234334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,1,128,0,1,fp8,fp8,0,0.45852800210316974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,float16,0,0.4852586587270101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,fp8,0,0.47869332631429035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,2,128,0,1,fp8,fp8,0,0.4657599925994873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,float16,0,0.5058720111846924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,fp8,0,0.4989813168843587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,fp8,0,0.5019253492355347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,4,128,0,1,fp8,fp8,0,0.568832000096639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,float16,0,0.5165493488311768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,float16,0,0.2861706614494324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,16,8,128,0,1,fp8,fp8,0,0.5058826605478922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,fp8,0,0.2810720006624858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,16,128,0,1,fp8,fp8,0,0.2895093361536662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,float16,0,0.24657599131266275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,fp8,0,0.2459519902865092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,1,128,0,1,fp8,fp8,0,0.23825599749883017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,float16,0,0.2518826723098755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,fp8,0,0.2497119903564453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,float16,0,0.26639999945958454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,2,128,0,1,fp8,fp8,0,0.24035199483235678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,float16,0,0.2610293428103129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,fp8,0,0.26051199436187744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,4,128,0,1,fp8,fp8,0,0.27355732520421344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,fp8,0,0.26131200790405273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,16,8,128,0,1,fp8,fp8,0,0.2640746633211772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,float16,0,0.15436266859372458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,fp8,0,0.15213867028554282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,16,128,0,1,fp8,fp8,0,0.15435733397801718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,float16,0,0.13125333189964294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,fp8,0,0.1304266651471456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,1,128,0,1,fp8,fp8,0,0.1243893305460612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,float16,0,0.13381333152453104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,fp8,0,0.13401066263516745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,2,128,0,1,fp8,fp8,0,0.12787200013796488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,float16,0,0.14076800147692362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,fp8,0,0.13963199655214945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,4,128,0,1,fp8,fp8,0,0.1434879998366038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,float16,0,0.1434560020764669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,fp8,0,0.14039466778437296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,16,8,128,0,1,fp8,fp8,0,0.1411786675453186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,float16,0,0.08898666501045227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,128,0,1,fp8,fp8,0,0.06809600194295247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,fp8,0,0.08785067001978557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,16,128,0,1,fp8,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,float16,0,0.07238933444023132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,float16,0,0.07786133388678233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,fp8,0,0.07264000177383423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,float16,0,0.07447466750939687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,fp8,0,0.07235200206438701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,2,128,0,1,fp8,fp8,0,0.06821866830190022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,128,0,1,fp8,fp8,0,0.07852266728878021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,fp8,0,0.07665066421031952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,4,128,0,1,fp8,fp8,0,0.07752533257007599
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,float16,0,0.07900799810886383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,fp8,0,0.07900266846021016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,float16,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,16,128,0,1,fp8,fp8,0,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,float16,0,0.04171733558177948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,fp8,0,0.043434664607048035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,1,128,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,float16,0,0.04260266820589701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,fp8,0,0.04285866518815359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,2,128,0,1,fp8,fp8,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,float16,0,0.04530133306980133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,fp8,0,0.04517866671085358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,4,128,0,1,fp8,fp8,0,0.04346133271853129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,128,0,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,float16,0,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,fp8,0,0.04456000030040741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,16,8,128,0,1,fp8,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,float16,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,1,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,float16,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,float16,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,2,128,0,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,4,128,0,1,fp8,fp8,0,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,16,8,128,0,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,16,128,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,1,128,0,1,fp8,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,128,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,2,128,0,1,fp8,fp8,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,float16,0,0.024885334074497223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,16,8,128,0,1,fp8,fp8,0,0.022895999252796173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,128,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,16,128,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,fp8,0,0.02038399999340375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,2,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,float16,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,4,128,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,16,8,128,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,16,128,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,1,128,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,2,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,float16,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,4,128,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,float16,0,0.27323200305302936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,16,8,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,fp8,0,0.26761066913604736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,1,128,0,1,fp8,fp8,0,0.25650666157404584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,float16,0,0.2778880000114441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,fp8,0,0.2760853370030721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,2,128,0,1,fp8,fp8,0,0.2593119939168294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,fp8,0,0.28756799300511676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,float16,0,0.2916799982388814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,float16,0,0.29128533601760864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,4,128,0,1,fp8,fp8,0,0.29313600063323975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,fp8,0,0.28454933563868207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,float16,0,0.16302399833997092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,16,8,128,0,1,fp8,fp8,0,0.2802880009015401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,fp8,0,0.16100266575813293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,16,128,0,1,fp8,fp8,0,0.16195733348528543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,float16,0,0.14620799819628397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,float16,0,0.14365866780281067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,128,0,1,fp8,fp8,0,0.13775466879208884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,fp8,0,0.14112533132235208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,1,128,0,1,fp8,fp8,0,0.13337600231170654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,fp8,0,0.14427199959754944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,float16,0,0.15292800466219583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,fp8,0,0.15126933654149374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,fp8,0,0.1523253321647644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,4,128,0,1,fp8,fp8,0,0.1515679955482483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,float16,0,0.15398933490117392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,float16,0,0.07637866834799449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,16,8,128,0,1,fp8,fp8,0,0.14933866262435913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,float16,0,0.09029866258303325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,float16,0,0.07872533301512401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,fp8,0,0.08988266189893086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,16,128,0,1,fp8,fp8,0,0.0902453362941742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,fp8,0,0.07747733096281688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,1,128,0,1,fp8,fp8,0,0.0702400008837382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,fp8,0,0.07797866563002269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,2,128,0,1,fp8,fp8,0,0.07247999807198842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,float16,0,0.08255999783674876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,fp8,0,0.08162133395671844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,float16,0,0.0844053328037262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,4,128,0,1,fp8,fp8,0,0.08167999982833862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,float16,0,0.04739200075467428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,fp8,0,0.08252266546090443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,16,8,128,0,1,fp8,fp8,0,0.0827946662902832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,float16,0,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,128,0,1,fp8,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,16,128,0,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,fp8,0,0.04656533400217692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,1,128,0,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,float16,0,0.04577066500981649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,fp8,0,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,float16,0,0.04894400139649709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,fp8,0,0.04776533444722494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,4,128,0,1,fp8,fp8,0,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,float16,0,0.04816000163555145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,fp8,0,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,16,8,128,0,1,fp8,fp8,0,0.0458133320013682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,float16,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,16,128,0,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,float16,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,1,128,0,1,fp8,fp8,0,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,float16,0,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,2,128,0,1,fp8,fp8,0,0.0286613330245018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,float16,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,4,128,0,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,16,8,128,0,1,fp8,fp8,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,16,128,0,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,1,128,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,2,128,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,128,0,1,fp8,fp8,0,0.021957332889238994
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,4,128,0,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,fp8,0,0.017594666530688603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,16,128,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,1,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,float16,0,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,2,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,4,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,16,8,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,16,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,1,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,2,128,0,1,fp8,fp8,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,4,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,16,8,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,16,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,1,128,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,4,128,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,float16,0,0.18425599733988443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,16,8,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,fp8,0,0.18273067474365234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,1,128,0,1,fp8,fp8,0,0.17169066270192465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,float16,0,0.19509865840276083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,float16,0,0.1871839960416158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,fp8,0,0.18622400363286337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,2,128,0,1,fp8,fp8,0,0.1728586753209432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,fp8,0,0.19112533330917358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,128,0,1,fp8,fp8,0,0.18530666828155518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,4,128,0,1,fp8,fp8,0,0.18876266479492188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,float16,0,0.19512534141540527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,fp8,0,0.19229867060979208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,float16,0,0.11105066537857056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,fp8,0,0.11033599575360616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,float16,0,0.09877866506576538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,16,128,0,1,fp8,fp8,0,0.1069653332233429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,float16,0,0.09824533263842265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,fp8,0,0.09667733311653137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,1,128,0,1,fp8,fp8,0,0.08918399612108867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,fp8,0,0.09886933366457622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,2,128,0,1,fp8,fp8,0,0.09123733639717102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,float16,0,0.10321600238482158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,fp8,0,0.10124267141024272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,4,128,0,1,fp8,fp8,0,0.09882666667302449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,float16,0,0.10500267148017883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,fp8,0,0.10290132959683736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,16,8,128,0,1,fp8,fp8,0,0.099589337905248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,float16,0,0.05917333563168844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,float16,0,0.05620799958705902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,fp8,0,0.05569600065549215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,fp8,0,0.058789332707722984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,16,128,0,1,fp8,fp8,0,0.05776533484458923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,float16,0,0.05583466589450836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,fp8,0,0.054671997825304665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,1,128,0,1,fp8,fp8,0,0.05046933392683665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,2,128,0,1,fp8,fp8,0,0.049770668148994446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,128,0,1,fp8,fp8,0,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,float16,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,float16,0,0.05815466741720835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,128,0,1,fp8,fp8,0,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,fp8,0,0.056736002365748085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,4,128,0,1,fp8,fp8,0,0.05418666700522105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,float16,0,0.05607999861240387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,fp8,0,0.05799466868241628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,fp8,0,0.038373333712418876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,128,0,1,fp8,fp8,0,0.03468266626199087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,float16,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,fp8,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,1,128,0,1,fp8,fp8,0,0.03408000121514002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,float16,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,float16,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,128,0,1,fp8,fp8,0,0.035946667194366455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,fp8,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,4,128,0,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,fp8,0,0.03809600075085958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,16,128,0,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,1,128,0,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,2,128,0,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,128,0,1,fp8,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,float16,0,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,fp8,0,0.02492800106604894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,4,128,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,float16,0,0.0249493345618248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,16,128,0,1,fp8,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,1,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,float16,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,2,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,float16,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,128,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,4,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,16,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,1,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,2,128,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,4,128,0,1,fp8,fp8,0,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,16,8,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,16,128,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,1,128,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,2,128,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,4,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,16,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,16,128,0,1,fp8,fp8,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,1,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,2,128,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,4,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,128,0,1,fp8,fp8,0,0.12803733348846436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,16,8,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,float16,0,0.13874133427937826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,fp8,0,0.13779200116793314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,float16,0,0.14065600434939066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,fp8,0,0.13877866665522257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,2,128,0,1,fp8,fp8,0,0.12780800461769104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,float16,0,0.14353600144386292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,fp8,0,0.14291733503341675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,fp8,0,0.07828799883524577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,4,128,0,1,fp8,fp8,0,0.1377066671848297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,float16,0,0.14383467038472494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,fp8,0,0.14362667004267374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,16,8,128,0,1,fp8,fp8,0,0.1379039982954661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,float16,0,0.07936533292134602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,16,128,0,1,fp8,fp8,0,0.07829866806666057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,float16,0,0.07633066674073537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,fp8,0,0.07563733557860057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,1,128,0,1,fp8,fp8,0,0.07041599849859874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,float16,0,0.07635733485221863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,fp8,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,2,128,0,1,fp8,fp8,0,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,float16,0,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,fp8,0,0.07799999912579854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,4,128,0,1,fp8,fp8,0,0.07355199754238129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,128,0,1,fp8,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,float16,0,0.04587733248869578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,float16,0,0.07799999912579854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,fp8,0,0.07825600107510884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,16,8,128,0,1,fp8,fp8,0,0.07420266668001811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,float16,0,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,fp8,0,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,fp8,0,0.04553066690762838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,1,128,0,1,fp8,fp8,0,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,float16,0,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,fp8,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,2,128,0,1,fp8,fp8,0,0.0422026664018631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,float16,0,0.04756799836953481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,4,128,0,1,fp8,fp8,0,0.044351999958356224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,float16,0,0.04562666515509287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,128,0,1,fp8,fp8,0,0.030218665798505146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,fp8,0,0.047685335079828896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,16,8,128,0,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,fp8,0,0.03107200066248576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,float16,0,0.02978666623433431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,1,128,0,1,fp8,fp8,0,0.027823999524116516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,fp8,0,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,2,128,0,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,float16,0,0.030165334542592365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,fp8,0,0.030405332644780476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,4,128,0,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,fp8,0,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,16,8,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,16,128,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,128,0,1,fp8,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,1,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,fp8,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,4,128,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,float16,0,0.02176533391078313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,16,8,128,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,float16,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,16,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,float16,0,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,2,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,4,128,0,1,fp8,fp8,0,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,16,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,1,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,2,128,0,1,fp8,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,4,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,16,8,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,16,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,1,128,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,2,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,4,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,16,8,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,16,128,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,1,128,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,2,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,4,128,0,1,fp8,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,16,8,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,float16,0,0.11547733346621196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,fp8,0,0.11522133151690166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,1,128,0,1,fp8,fp8,0,0.11045866211255391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,float16,0,0.1167093316713969
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,fp8,0,0.11550933122634888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,2,128,0,1,fp8,fp8,0,0.11043733358383179
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,float16,0,0.11796800295511882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,float16,0,0.11880532900492351
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,fp8,0,0.11717333396275838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,4,128,0,1,fp8,fp8,0,0.1111306647459666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,fp8,0,0.11610666910807292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,16,8,128,0,1,fp8,fp8,0,0.11203199625015259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,fp8,0,0.06750933329264323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,float16,0,0.06826133529345195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,16,128,0,1,fp8,fp8,0,0.06428800026575725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,float16,0,0.06566933294137318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,fp8,0,0.06477866570154826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,1,128,0,1,fp8,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,float16,0,0.06620799998442332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,fp8,0,0.06638399759928386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,2,128,0,1,fp8,fp8,0,0.06381333371003468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,float16,0,0.06634133557478587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,fp8,0,0.06658666829268138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,float16,0,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,4,128,0,1,fp8,fp8,0,0.06462933123111725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,float16,0,0.06674666702747345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,fp8,0,0.06702933212121327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,16,8,128,0,1,fp8,fp8,0,0.06399466594060262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,16,128,0,1,fp8,fp8,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,fp8,0,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,float16,0,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,float16,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,1,128,0,1,fp8,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,fp8,0,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,float16,0,0.03933866570393244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,2,128,0,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,4,128,0,1,fp8,fp8,0,0.04004266609748205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,float16,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,float16,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,16,8,128,0,1,fp8,fp8,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,fp8,0,0.027813332776228588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,16,128,0,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,128,0,1,fp8,fp8,0,0.026309333741664886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,float16,0,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,1,128,0,1,fp8,fp8,0,0.02651199946800868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,float16,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,fp8,0,0.02699200063943863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,4,128,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,16,8,128,0,1,fp8,fp8,0,0.026693334182103474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,16,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,1,128,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,float16,0,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,2,128,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,float16,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,4,128,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,fp8,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,16,8,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,fp8,0,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,16,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,1,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,2,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,4,128,0,1,fp8,fp8,0,0.01617066686352094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,16,8,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,16,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,1,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,2,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,4,128,0,1,fp8,fp8,0,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,16,8,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,float16,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,16,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,1,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,16,8,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,16,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,1,128,0,1,fp8,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,2,128,0,1,fp8,fp8,0,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,4,128,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,16,8,128,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,128,0,1,fp8,fp8,0,4.61138121287028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,128,0,1,fp8,fp8,0,4.550586700439453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,float16,0,5.881231943766276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,fp8,0,5.908063888549805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,float16,0,5.899845123291016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,fp8,0,6.0979359944661455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,float16,0,5.933205286661784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,fp8,0,5.893594741821289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,float16,0,2.9160852432250977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,128,0,1,fp8,fp8,0,2.5046985944112143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,fp8,0,2.9501654307047525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,float16,0,2.8027518590291343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,fp8,0,2.800800005594889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,128,0,1,fp8,fp8,0,4.596512158711751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,128,0,1,fp8,fp8,0,2.3429013888041177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,float16,0,2.932938575744629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,128,0,1,fp8,fp8,0,2.329909324645996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,fp8,0,2.8819198608398438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,float16,0,1.5339147249857585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,fp8,0,1.530128002166748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,float16,0,2.895557403564453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,128,0,1,fp8,fp8,0,1.3829065958658855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,128,0,1,fp8,fp8,0,2.39573335647583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,fp8,0,2.856405258178711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,float16,0,1.4608853658040364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,128,0,1,fp8,fp8,0,1.3274239699045818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,fp8,0,1.501157283782959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,float16,0,1.4731094042460124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,128,0,1,fp8,fp8,0,1.2742239634195964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,fp8,0,1.5196266174316406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,float16,0,1.4734506607055664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,float16,0,0.8474613030751547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,fp8,0,1.673866589864095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,fp8,0,0.9013386567433676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,128,0,1,fp8,fp8,0,0.7657492955525717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,128,0,1,fp8,fp8,0,1.3715893427530925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,float16,0,0.811626672744751
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,fp8,0,0.8392586708068848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,128,0,1,fp8,fp8,0,0.7995359897613525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,float16,0,0.8253546555836996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,fp8,0,0.8320533434549967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,128,0,1,fp8,fp8,0,0.7393386363983154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,float16,0,0.8280533154805502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,128,0,1,fp8,fp8,0,0.7395626703898112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,fp8,0,0.8298827012379965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,128,0,1,fp8,fp8,0,2.685866673787435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,float16,0,3.349269231160482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,128,0,1,fp8,fp8,0,2.7049919764200845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,fp8,0,3.3853867848714194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,float16,0,3.6450719833374023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,fp8,0,3.337082544962565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,float16,0,3.3744799296061196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,fp8,0,3.4017333984375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,float16,0,1.7615092595418294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,fp8,0,1.7589386304219563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,128,0,1,fp8,fp8,0,1.6027572949727376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,float16,0,1.7436000506083171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,128,0,1,fp8,fp8,0,2.7172425587972007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,fp8,0,1.7000373204549153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,128,0,1,fp8,fp8,0,1.5211893717447917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,float16,0,1.8445653915405273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,128,0,1,fp8,fp8,0,1.5098293622334797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,fp8,0,1.69488525390625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,float16,0,1.6939199765523274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,float16,0,0.9596213499704996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,fp8,0,0.9310773213704427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,128,0,1,fp8,fp8,0,1.4345653851826985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,128,0,1,fp8,fp8,0,0.8923253218332926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,fp8,0,1.7009600003560383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,float16,0,0.903557300567627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,fp8,0,0.9551253318786621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,128,0,1,fp8,fp8,0,0.9600586891174316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,float16,0,0.8963359991709391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,fp8,0,0.8954613208770752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,128,0,1,fp8,fp8,0,0.7856427033742269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,float16,0,0.8991093635559082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,float16,0,0.5768320163091024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,fp8,0,0.9074560006459554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,fp8,0,0.5341973304748535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,128,0,1,fp8,fp8,0,0.8022720019022623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,128,0,1,fp8,fp8,0,0.4819466670354207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,float16,0,0.5121013323465983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,fp8,0,0.5081760088602701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,128,0,1,fp8,fp8,0,0.47410666942596436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,float16,0,0.5194933414459229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,fp8,0,0.5120000044504801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,128,0,1,fp8,fp8,0,0.4724746545155843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,float16,0,0.5162133375803629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,fp8,0,0.5206400156021118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,128,0,1,fp8,fp8,0,0.4687039852142334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,128,0,1,fp8,fp8,0,1.959386666615804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,float16,0,2.3120106061299643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,fp8,0,2.3824426333109536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,128,0,1,fp8,fp8,0,1.9681599934895833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,float16,0,2.417253335316976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,fp8,0,2.472287972768148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,float16,0,2.3472906748453775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,float16,0,1.288325309753418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,fp8,0,1.288325309753418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,128,0,1,fp8,fp8,0,1.1636959711710613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,float16,0,1.3026666641235352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,128,0,1,fp8,fp8,0,2.0114506085713706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,fp8,0,2.429109255472819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,128,0,1,fp8,fp8,0,1.0446080366770427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,fp8,0,1.3952107429504395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,float16,0,1.2356853485107422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,fp8,0,1.2311626275380452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,128,0,1,fp8,fp8,0,1.0509973367055256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,float16,0,1.2206079959869385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,float16,0,0.6864533424377441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,fp8,0,0.7031199932098389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,fp8,0,1.2254133224487305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,128,0,1,fp8,fp8,0,1.1396373112996419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,128,0,1,fp8,fp8,0,0.6488320032755533
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,float16,0,0.6907947063446045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,fp8,0,0.6633280118306478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,128,0,1,fp8,fp8,0,0.6171520153681437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,float16,0,0.6588906844456991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,fp8,0,0.6619733174641927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,128,0,1,fp8,fp8,0,0.605183998743693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,128,0,1,fp8,fp8,0,0.597269336382548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,float16,0,0.6723413467407227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,fp8,0,0.6714826424916586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,float16,0,0.4050079981486003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,fp8,0,0.4087306658426921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,128,0,1,fp8,fp8,0,0.3689813216527303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,float16,0,0.38522664705912274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,fp8,0,0.3877973159154256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,fp8,0,0.398911992708842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,128,0,1,fp8,fp8,0,0.3543253342310588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,float16,0,0.387231985727946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,128,0,1,fp8,fp8,0,0.3519839843114217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,float16,0,0.39052800337473553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,fp8,0,0.392410675684611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,128,0,1,fp8,fp8,0,0.35620800654093426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,128,0,1,fp8,fp8,0,2.603504021962484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,float16,0,3.3205172220865884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,fp8,0,3.1173439025878906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,128,0,1,fp8,fp8,0,2.613706588745117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,float16,0,3.2922185262044272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,fp8,0,3.3062079747517905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,float16,0,3.2963733673095703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,fp8,0,3.306000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,float16,0,1.6792853673299153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,fp8,0,1.6871253649393718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,128,0,1,fp8,fp8,0,1.4265333811442058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,float16,0,1.7445920308430989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,128,0,1,fp8,fp8,0,2.651909351348877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,fp8,0,1.7287306785583496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,128,0,1,fp8,fp8,0,1.3923519452412922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,float16,0,1.7391573588053386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,fp8,0,1.5958773295084636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,128,0,1,fp8,fp8,0,1.385983943939209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,float16,0,0.9216213226318359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,fp8,0,0.8785226345062256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,float16,0,1.5857173601786296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,128,0,1,fp8,fp8,0,1.368831952412923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,fp8,0,1.6384693781534831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,128,0,1,fp8,fp8,0,0.8612053394317627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,float16,0,0.9531253178914388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,fp8,0,0.830736001332601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,float16,0,0.8339520295461019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,128,0,1,fp8,fp8,0,0.7525333563486735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,fp8,0,0.8449013233184814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,128,0,1,fp8,fp8,0,0.7303360303243002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,float16,0,0.8604640165964762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,fp8,0,0.8407466411590576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,float16,0,0.48414401213328045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,fp8,0,0.4870773156483968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,float16,0,0.46213865280151367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,128,0,1,fp8,fp8,0,0.7948160171508789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,128,0,1,fp8,fp8,0,0.4387199878692627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,fp8,0,0.45820800463358563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,128,0,1,fp8,fp8,0,0.4171200195948283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,float16,0,0.45907731850941974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,fp8,0,0.4618186553319295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,fp8,0,0.47065067291259766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,128,0,1,fp8,fp8,0,0.4219733476638794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,float16,0,0.47313066323598224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,128,0,1,fp8,fp8,0,0.4198773304621379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,float16,0,0.29050666093826294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,fp8,0,0.29072000583012897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,128,0,1,fp8,fp8,0,0.26838932434717816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,float16,0,0.27727999289830524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,fp8,0,0.27884799242019653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,128,0,1,fp8,fp8,0,0.2563626567522685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,float16,0,0.2800160050392151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,float16,0,0.27729066212972003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,fp8,0,0.27800534168879193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,128,0,1,fp8,fp8,0,0.2547360062599182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,fp8,0,0.2808319926261902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,128,0,1,fp8,fp8,0,0.25782400369644165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,float16,0,1.8992479642232258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,128,0,1,fp8,fp8,0,1.614133358001709
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,fp8,0,1.8876852989196777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,float16,0,1.8840266863505046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,128,0,1,fp8,fp8,0,1.620207945505778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,fp8,0,1.8920213381449382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,float16,0,1.903781255086263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,float16,0,1.0350879828135173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,128,0,1,fp8,fp8,0,0.9048266410827637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,fp8,0,1.0568959712982178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,float16,0,0.983130693435669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,128,0,1,fp8,fp8,0,1.6479040781656902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,fp8,0,1.9715733528137207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,128,0,1,fp8,fp8,0,0.8904266357421875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,fp8,0,0.9744746685028076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,float16,0,0.9853706359863281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,fp8,0,0.9883999824523926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,128,0,1,fp8,fp8,0,0.8573919932047526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,float16,0,0.9829280376434326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,float16,0,0.5516853332519531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,fp8,0,1.0009067058563232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,128,0,1,fp8,fp8,0,0.9068693319956461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,fp8,0,0.5571413437525431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,128,0,1,fp8,fp8,0,0.5092800060907999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,float16,0,0.534986654917399
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,float16,0,0.5203413168589274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,fp8,0,0.5283360083897909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,fp8,0,0.5188320080439249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,128,0,1,fp8,fp8,0,0.4694666862487793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,128,0,1,fp8,fp8,0,0.4669119914372762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,float16,0,0.5295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,128,0,1,fp8,fp8,0,0.28782399495442706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,fp8,0,0.5314773321151733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,float16,0,0.3163040081659953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,128,0,1,fp8,fp8,0,0.473690668741862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,fp8,0,0.31857067346572876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,fp8,0,0.2976800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,128,0,1,fp8,fp8,0,0.2728373408317566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,float16,0,0.2940746744473775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,fp8,0,0.29331199328104657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,128,0,1,fp8,fp8,0,0.26703999439875287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,float16,0,0.2959786653518677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,float16,0,0.3029279907544454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,fp8,0,0.30265067021052044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,float16,0,0.19474667310714722
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,128,0,1,fp8,fp8,0,0.27689067522684735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,float16,0,0.1997119983037313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,fp8,0,0.19980265696843466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,128,0,1,fp8,fp8,0,0.1867413322130839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,fp8,0,0.19323732455571493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,128,0,1,fp8,fp8,0,0.17880533138910928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,fp8,0,0.1941493352254232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,float16,0,0.19193067153294882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,fp8,0,0.1941546599070231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,128,0,1,fp8,fp8,0,0.17729600270589194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,float16,0,0.19369065761566162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,128,0,1,fp8,fp8,0,0.17868266503016153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,float16,0,1.9502347310384114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,128,0,1,fp8,fp8,0,1.6573066711425781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,fp8,0,1.9393332799275715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,float16,0,1.9814507166544597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,128,0,1,fp8,fp8,0,1.6720159848531086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,fp8,0,1.9463839530944824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,float16,0,1.969930648803711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,fp8,0,1.9841440518697102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,float16,0,1.0424213409423828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,128,0,1,fp8,fp8,0,0.9758506615956625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,fp8,0,1.0917119979858398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,float16,0,0.9730239709218343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,128,0,1,fp8,fp8,0,1.7680373191833496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,fp8,0,0.9843093554178873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,128,0,1,fp8,fp8,0,0.9378933111826578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,float16,0,0.9899306297302246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,fp8,0,0.9758133093516032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,128,0,1,fp8,fp8,0,0.8745919863382975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,float16,0,1.005130688349406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,float16,0,0.551578680674235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,fp8,0,0.9981386661529541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,128,0,1,fp8,fp8,0,0.8752533594767252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,fp8,0,0.5178879896799723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,fp8,0,0.5604586601257324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,128,0,1,fp8,fp8,0,0.4957066774368286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,float16,0,0.5082453489303589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,128,0,1,fp8,fp8,0,0.4565226634343465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,float16,0,0.5156799952189127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,128,0,1,fp8,fp8,0,0.457477331161499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,float16,0,0.5298666556676229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,fp8,0,0.5148586829503378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,fp8,0,0.5250453154246012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,128,0,1,fp8,fp8,0,0.4656533400217692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,float16,0,0.3052266637484233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,fp8,0,0.2804373304049174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,fp8,0,0.3086880048116048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,128,0,1,fp8,fp8,0,0.27811199426651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,fp8,0,0.2847306728363037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,float16,0,0.28006933132807416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,128,0,1,fp8,fp8,0,0.25622399648030597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,float16,0,0.28173333406448364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,128,0,1,fp8,fp8,0,0.25915733973185223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,float16,0,0.28989867369333905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,fp8,0,0.2909226616223653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,128,0,1,fp8,fp8,0,0.263589342435201
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,float16,0,0.1816693345705668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,fp8,0,0.18334933121999106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,128,0,1,fp8,fp8,0,0.16665599743525186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,float16,0,0.16852800051371256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,fp8,0,0.16930667559305826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,128,0,1,fp8,fp8,0,0.15227199594179788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,float16,0,0.16847467422485352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,fp8,0,0.16931732495625815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,128,0,1,fp8,fp8,0,0.1523306667804718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,fp8,0,0.11893866459528606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,float16,0,0.17067732413609824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,fp8,0,0.17112000783284506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,128,0,1,fp8,fp8,0,0.15607999761899313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,float16,0,0.11784533659617107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,128,0,1,fp8,fp8,0,0.10944533348083496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,float16,0,0.11616533001263936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,fp8,0,0.11556266744931538
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,128,0,1,fp8,fp8,0,0.10710933804512024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,float16,0,0.11714133620262146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,fp8,0,0.11628266175587972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,128,0,1,fp8,fp8,0,0.10705600182215373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,float16,0,0.1162453293800354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,fp8,0,0.11653332908948262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,128,0,1,fp8,fp8,0,0.10806933045387268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,float16,0,1.22270401318868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,fp8,0,1.2386986414591472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,128,0,1,fp8,fp8,0,1.071893294652303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,float16,0,1.2282880147298176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,fp8,0,1.2336266835530598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,128,0,1,fp8,fp8,0,1.0838507016499836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,float16,0,1.254479964574178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,float16,0,0.7079626719156901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,fp8,0,1.2568373680114746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,fp8,0,0.6918666362762451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,128,0,1,fp8,fp8,0,1.1312373479207356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,128,0,1,fp8,fp8,0,0.6155413389205933
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,float16,0,0.6277173360188802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,fp8,0,0.6336693366368612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,128,0,1,fp8,fp8,0,0.5707146724065145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,float16,0,0.6469759941101074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,float16,0,0.6442293326059977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,fp8,0,0.6338026523590088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,128,0,1,fp8,fp8,0,0.5601760149002075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,fp8,0,0.6477813323338827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,128,0,1,fp8,fp8,0,0.3329813281695048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,float16,0,0.36666667461395264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,128,0,1,fp8,fp8,0,0.5734506845474243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,fp8,0,0.3726453383763631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,float16,0,0.3350026607513428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,fp8,0,0.33590400218963623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,128,0,1,fp8,fp8,0,0.30106133222579956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,float16,0,0.3378666639328003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,fp8,0,0.34055999914805096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,128,0,1,fp8,fp8,0,0.30481600761413574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,fp8,0,0.20935465892155966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,float16,0,0.34517868359883624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,fp8,0,0.3462560176849365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,128,0,1,fp8,fp8,0,0.31074132521947223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,float16,0,0.2078346610069275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,128,0,1,fp8,fp8,0,0.19113600254058838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,float16,0,0.18523200352986655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,fp8,0,0.18858667214711508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,float16,0,0.1946986714998881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,128,0,1,fp8,fp8,0,0.170415997505188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,float16,0,0.1898933251698812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,fp8,0,0.12837866942087808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,fp8,0,0.18870933850606283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,128,0,1,fp8,fp8,0,0.17312000195185342
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,fp8,0,0.19767467180887857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,128,0,1,fp8,fp8,0,0.17909866571426392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,float16,0,0.12743999560674033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,128,0,1,fp8,fp8,0,0.11967999736467998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,float16,0,0.11982933680216472
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,fp8,0,0.12190933028856914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,fp8,0,0.12270399928092957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,128,0,1,fp8,fp8,0,0.10872532924016316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,float16,0,0.11994133392969768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,float16,0,0.07871466875076294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,fp8,0,0.12218133608500163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,128,0,1,fp8,fp8,0,0.10905067125956218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,float16,0,0.12079999844233195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,128,0,1,fp8,fp8,0,0.11110933621724446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,fp8,0,0.08077333370844524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,float16,0,0.07737066845099132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,128,0,1,fp8,fp8,0,0.07444266478220622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,float16,0,0.07850666840871175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,fp8,0,0.07934933404127757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,128,0,1,fp8,fp8,0,0.07362133264541626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,fp8,0,0.07858666777610779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,128,0,1,fp8,fp8,0,0.0732586681842804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,float16,0,0.07841066519419353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,fp8,0,0.07797333101431529
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,128,0,1,fp8,fp8,0,0.07231466472148895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,float16,0,1.3348426818847656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,128,0,1,fp8,fp8,0,1.1790560086568196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,fp8,0,1.3498667081197102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,float16,0,1.3552160263061523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,fp8,0,1.3672159512837727
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,128,0,1,fp8,fp8,0,1.1899840037027996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,float16,0,1.3650986353556316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,float16,0,0.7597493330637614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,fp8,0,0.7650187015533447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,fp8,0,1.383248011271159
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,128,0,1,fp8,fp8,0,0.682533343633016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,128,0,1,fp8,fp8,0,1.2218560377756755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,float16,0,0.6803359985351562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,fp8,0,0.6861546834309896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,float16,0,0.6871946652730306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,128,0,1,fp8,fp8,0,0.6069279909133911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,128,0,1,fp8,fp8,0,0.6114720106124878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,fp8,0,0.6995999813079834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,float16,0,0.6991360187530518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,float16,0,0.39638932545979816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,128,0,1,fp8,fp8,0,0.35949865976969403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,fp8,0,0.400490681330363
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,fp8,0,0.7043840090433756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,128,0,1,fp8,fp8,0,0.625274658203125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,float16,0,0.356389323870341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,fp8,0,0.3606826861699422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,128,0,1,fp8,fp8,0,0.3213813304901123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,float16,0,0.35941867033640545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,fp8,0,0.36323734124501544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,128,0,1,fp8,fp8,0,0.3240106701850891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,float16,0,0.3662186861038208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,fp8,0,0.37141335010528564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,float16,0,0.19474132855733237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,128,0,1,fp8,fp8,0,0.32957865794499713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,float16,0,0.21791466077168783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,fp8,0,0.2222986618677775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,128,0,1,fp8,fp8,0,0.19883733987808228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,fp8,0,0.19646932681401572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,128,0,1,fp8,fp8,0,0.17724267641703287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,float16,0,0.1987733244895935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,float16,0,0.1276853382587433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,fp8,0,0.19877866903940836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,128,0,1,fp8,fp8,0,0.18038400014241537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,float16,0,0.20360000928243002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,fp8,0,0.20551466941833496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,128,0,1,fp8,fp8,0,0.10193600257237752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,128,0,1,fp8,fp8,0,0.18263467152913412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,fp8,0,0.12851732969284058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,128,0,1,fp8,fp8,0,0.11910933256149292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,float16,0,0.11477333307266235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,fp8,0,0.11515733599662781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,float16,0,0.11452266573905945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,fp8,0,0.11409599582354228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,128,0,1,fp8,fp8,0,0.10222933689753215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,128,0,1,fp8,fp8,0,0.0746559997399648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,float16,0,0.11539733409881592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,fp8,0,0.11718400319417317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,128,0,1,fp8,fp8,0,0.10727999607721965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,float16,0,0.08028266827265422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,fp8,0,0.08075733482837677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,128,0,1,fp8,fp8,0,0.07042133311430614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,float16,0,0.07713599999745686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,fp8,0,0.07798933486143748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,128,0,1,fp8,fp8,0,0.0705973356962204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,float16,0,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,fp8,0,0.07878399888674419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,float16,0,0.07794666786988576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,fp8,0,0.07835733393828075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,128,0,1,fp8,fp8,0,0.07209066549936931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,128,0,1,fp8,fp8,0,0.0535093347231547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,float16,0,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,fp8,0,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,128,0,1,fp8,fp8,0,0.05418133238951365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,float16,0,0.058143998185793556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,fp8,0,0.056736002365748085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,float16,0,0.05635199944178263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,128,0,1,fp8,fp8,0,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,float16,0,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,fp8,0,0.058042665322621666
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,128,0,1,fp8,fp8,0,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,float16,0,0.9355893135070801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,fp8,0,0.9341440200805664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,128,0,1,fp8,fp8,0,0.819434642791748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,float16,0,0.944159984588623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,fp8,0,0.9494453271230062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,128,0,1,fp8,fp8,0,0.82968537012736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,float16,0,0.9588906764984131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,float16,0,0.526581327120463
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,fp8,0,0.9634293715159098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,fp8,0,0.5354880094528198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,128,0,1,fp8,fp8,0,0.4769333203633626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,128,0,1,fp8,fp8,0,0.8457386493682861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,float16,0,0.47537068525950116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,fp8,0,0.4771626790364583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,128,0,1,fp8,fp8,0,0.4179146687189738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,float16,0,0.47675732771555585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,fp8,0,0.4786613384882609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,128,0,1,fp8,fp8,0,0.42156267166137695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,float16,0,0.4872266848882039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,fp8,0,0.4910240173339844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,float16,0,0.27992000182469684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,128,0,1,fp8,fp8,0,0.4341866572697957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,fp8,0,0.2823626597722371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,128,0,1,fp8,fp8,0,0.2544533411661784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,float16,0,0.24889065821965536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,fp8,0,0.2518720030784607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,128,0,1,fp8,fp8,0,0.22408533096313477
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,float16,0,0.25178666909535724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,fp8,0,0.2542293270428975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,128,0,1,fp8,fp8,0,0.226090669631958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,float16,0,0.25861867268880206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,fp8,0,0.2606613238652547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,128,0,1,fp8,fp8,0,0.23341333866119385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,float16,0,0.1562933325767517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,fp8,0,0.1586133340994517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,128,0,1,fp8,fp8,0,0.14383999506632486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,float16,0,0.1339359978834788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,128,0,1,fp8,fp8,0,0.12734933694203696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,fp8,0,0.13569066921869913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,128,0,1,fp8,fp8,0,0.12258133292198181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,float16,0,0.1381280024846395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,fp8,0,0.13890133301417032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,float16,0,0.14338666200637817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,fp8,0,0.14353600144386292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,128,0,1,fp8,fp8,0,0.13157332936922708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,float16,0,0.09056533376375835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,fp8,0,0.09223467111587524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,128,0,1,fp8,fp8,0,0.08681600292523702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,128,0,1,fp8,fp8,0,0.07534400125344594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,float16,0,0.08475200335184734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,fp8,0,0.08552533388137817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,128,0,1,fp8,fp8,0,0.075162669022878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,float16,0,0.08451732993125916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,fp8,0,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,fp8,0,0.08400533596674602
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,float16,0,0.0844640036424001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,fp8,0,0.08562133709589641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,128,0,1,fp8,fp8,0,0.07633066674073537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,float16,0,0.055829331278800964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,128,0,1,fp8,fp8,0,0.05163733164469401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,float16,0,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,fp8,0,0.0537066658337911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,128,0,1,fp8,fp8,0,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,float16,0,0.053674668073654175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,float16,0,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,fp8,0,0.05438933273156484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,128,0,1,fp8,fp8,0,0.04903466502825419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,float16,0,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,128,0,1,fp8,fp8,0,0.05006400247414907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,fp8,0,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,128,0,1,fp8,fp8,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,128,0,1,fp8,fp8,0,0.04488533238569895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,float16,0,0.047322665651639305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,fp8,0,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,128,0,1,fp8,fp8,0,0.04353066782156626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,float16,0,0.04781333108743032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,fp8,0,0.04754666487375895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,float16,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,fp8,0,0.048341333866119385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,128,0,1,fp8,fp8,0,0.04394133388996124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,float16,0,0.971887985865275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,fp8,0,0.9714186986287435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,128,0,1,fp8,fp8,0,0.9118506908416748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,float16,0,0.9925920168558756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,fp8,0,0.993168036142985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,128,0,1,fp8,fp8,0,0.9374720255533854
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,float16,0,1.008570671081543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,float16,0,0.5621279875437418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,fp8,0,0.5532693465550741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,fp8,0,1.001482645670573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,float16,0,0.497381329536438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,fp8,0,0.4925066630045573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,128,0,1,fp8,fp8,0,0.9695466359456381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,128,0,1,fp8,fp8,0,0.5355999867121378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,128,0,1,fp8,fp8,0,0.468666672706604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,float16,0,0.5054239829381307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,fp8,0,0.504965345064799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,128,0,1,fp8,fp8,0,0.4758506615956624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,float16,0,0.5161919991175333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,fp8,0,0.5106186469395956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,float16,0,0.29574400186538696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,fp8,0,0.25759466489156085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,fp8,0,0.28957333167394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,128,0,1,fp8,fp8,0,0.48609066009521484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,128,0,1,fp8,fp8,0,0.27716267108917236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,float16,0,0.25806933641433716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,128,0,1,fp8,fp8,0,0.24117867151896158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,float16,0,0.2701066732406616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,float16,0,0.26574933528900146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,fp8,0,0.26500799258550006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,128,0,1,fp8,fp8,0,0.24832000335057577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,128,0,1,fp8,fp8,0,0.1502133309841156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,fp8,0,0.2671626607577006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,float16,0,0.139765332142512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,128,0,1,fp8,fp8,0,0.25385065873463947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,float16,0,0.16082132856051126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,float16,0,0.1439359982808431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,fp8,0,0.1585813363393148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,fp8,0,0.14078399538993835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,128,0,1,fp8,fp8,0,0.12640532851219177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,fp8,0,0.14276267091433206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,128,0,1,fp8,fp8,0,0.13512532909711203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,fp8,0,0.09088533123334248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,float16,0,0.1469386617342631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,fp8,0,0.14615999658902487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,128,0,1,fp8,fp8,0,0.1369493305683136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,128,0,1,fp8,fp8,0,0.07282666862010956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,float16,0,0.09426132837931316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,128,0,1,fp8,fp8,0,0.08891733487447102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,float16,0,0.08133333424727122
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,fp8,0,0.08177066842714946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,float16,0,0.08276266853014629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,fp8,0,0.08229333162307739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,128,0,1,fp8,fp8,0,0.072543998559316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,float16,0,0.08245866497357686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,fp8,0,0.0851093331972758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,128,0,1,fp8,fp8,0,0.0765226682027181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,float16,0,0.05585066477457682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,float16,0,0.05439466734727224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,fp8,0,0.05778666834036509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,128,0,1,fp8,fp8,0,0.05213333169619242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,float16,0,0.055386667450269066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,fp8,0,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,128,0,1,fp8,fp8,0,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,fp8,0,0.05507733424504598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,128,0,1,fp8,fp8,0,0.049839998284975685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,float16,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,128,0,1,fp8,fp8,0,0.04941866795221964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,float16,0,0.03860799968242645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,fp8,0,0.037274666130542755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,128,0,1,fp8,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,128,0,1,fp8,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,float16,0,0.037274666130542755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,128,0,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,float16,0,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,float16,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,fp8,0,0.03741333385308584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,128,0,1,fp8,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,float16,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,fp8,0,0.035242666800816856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,128,0,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,128,0,1,fp8,fp8,0,0.031082667410373688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,float16,0,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,128,0,1,fp8,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,float16,0,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,float16,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,128,0,1,fp8,fp8,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,float16,0,0.8358933130900065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,fp8,0,0.8322133223215739
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,1,128,0,1,fp8,fp8,0,0.7886506716410319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,float16,0,0.859392007191976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,fp8,0,0.8540799617767334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,2,128,0,1,fp8,fp8,0,0.807802677154541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,float16,0,0.8693973223368326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,float16,0,0.4905173381169637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,fp8,0,0.4803146521250407
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,fp8,0,0.8602240085601807
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,12,4,128,0,1,fp8,fp8,0,0.8454399903615316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,12,128,0,1,fp8,fp8,0,0.4707039992014567
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,float16,0,0.42631999651590985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,fp8,0,0.4264479875564575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,1,128,0,1,fp8,fp8,0,0.4038879871368408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,128,0,1,fp8,fp8,0,0.4151839812596639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,float16,0,0.4360320170720418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,fp8,0,0.4362666606903076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,float16,0,0.4448266824086507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,fp8,0,0.44226133823394775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,float16,0,0.2593013246854146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,12,4,128,0,1,fp8,fp8,0,0.4203253189722697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,128,0,1,fp8,fp8,0,0.20763200521469116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,fp8,0,0.2525706688563029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,12,128,0,1,fp8,fp8,0,0.24480533599853516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,float16,0,0.22248532374699911
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,fp8,0,0.22206934293111166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,float16,0,0.22838934262593588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,fp8,0,0.2278560002644857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,2,128,0,1,fp8,fp8,0,0.21754666169484457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,float16,0,0.23365867137908936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,fp8,0,0.23097066084543863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,12,4,128,0,1,fp8,fp8,0,0.22065067291259766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,float16,0,0.14169599612553915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,fp8,0,0.13740266362826029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,12,128,0,1,fp8,fp8,0,0.13251733779907227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,float16,0,0.12134400010108948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,fp8,0,0.12098667025566101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,1,128,0,1,fp8,fp8,0,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,float16,0,0.12339733044306438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,fp8,0,0.12430399656295776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,2,128,0,1,fp8,fp8,0,0.11752000451087952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,float16,0,0.1267733375231425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,fp8,0,0.126202662785848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,12,4,128,0,1,fp8,fp8,0,0.1202826698621114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,float16,0,0.07966400186220805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,fp8,0,0.07856533428033192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,12,128,0,1,fp8,fp8,0,0.07719466586907704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,fp8,0,0.07045333087444305
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,float16,0,0.07039999961853027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,fp8,0,0.06862399975458781
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,1,128,0,1,fp8,fp8,0,0.06260799864927928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,float16,0,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,2,128,0,1,fp8,fp8,0,0.06282666822274525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,float16,0,0.07035733262697856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,fp8,0,0.07086933155854543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,12,4,128,0,1,fp8,fp8,0,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,float16,0,0.04929600159327189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,fp8,0,0.04790933430194855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,12,128,0,1,fp8,fp8,0,0.043525333205858864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,float16,0,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,fp8,0,0.04613866905371348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,1,128,0,1,fp8,fp8,0,0.04161600023508072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,float16,0,0.046282668908437095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,2,128,0,1,fp8,fp8,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,float16,0,0.046240001916885376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,fp8,0,0.04691733419895172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,12,4,128,0,1,fp8,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,128,0,1,fp8,fp8,0,0.02815466622511546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,12,128,0,1,fp8,fp8,0,0.029781334102153778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,float16,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,128,0,1,fp8,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,2,128,0,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,float16,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,fp8,0,0.03162666658560435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,128,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,float16,0,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,12,128,0,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,float16,0,0.028837333122889202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,2,128,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,float16,0,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,12,4,128,0,1,fp8,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,12,128,0,1,fp8,fp8,0,0.022672000030676525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,1,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,128,0,1,fp8,fp8,0,0.02232533444960912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,2,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,float16,0,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,float16,0,0.38339734077453613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,fp8,0,0.38197867075602215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,1,128,0,1,fp8,fp8,0,0.3656586805979411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,float16,0,0.3924959897994995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,float16,0,0.4010719855626424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,fp8,0,0.399344007174174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,fp8,0,0.3911466598510742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,2,128,0,1,fp8,fp8,0,0.37491734822591144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,float16,0,0.2390986680984497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,12,4,128,0,1,fp8,fp8,0,0.38470399379730225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,fp8,0,0.23149865865707397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,float16,0,0.2071253259976705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,12,128,0,1,fp8,fp8,0,0.22612800200780234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,float16,0,0.19903467098871866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,fp8,0,0.19946134090423584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,1,128,0,1,fp8,fp8,0,0.18811200062433878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,fp8,0,0.20495466391245523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,2,128,0,1,fp8,fp8,0,0.19683200120925903
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,float16,0,0.210698664188385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,128,0,1,fp8,fp8,0,0.12358933687210083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,fp8,0,0.20937599738438925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,12,4,128,0,1,fp8,fp8,0,0.20229333639144897
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,float16,0,0.13053866227467856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,fp8,0,0.12763733665148416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,float16,0,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,128,0,1,fp8,fp8,0,0.1072746713956197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,fp8,0,0.11102400223414104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,1,128,0,1,fp8,fp8,0,0.09916266798973083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,float16,0,0.1132533351580302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,fp8,0,0.11319999893506368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,float16,0,0.11662399768829346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,fp8,0,0.1150986651579539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,12,4,128,0,1,fp8,fp8,0,0.1114026705423991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,fp8,0,0.06247999767462412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,float16,0,0.07421866556008656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,fp8,0,0.07225066423416138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,12,128,0,1,fp8,fp8,0,0.0709440012772878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,128,0,1,fp8,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,float16,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,1,128,0,1,fp8,fp8,0,0.0551093320051829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,128,0,1,fp8,fp8,0,0.058117335041364036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,float16,0,0.06295999884605408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,fp8,0,0.0633546660343806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,float16,0,0.06460266808668773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,fp8,0,0.06368533273537953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,float16,0,0.04444266855716705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,fp8,0,0.0454720010360082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,float16,0,0.04348266621430715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,12,128,0,1,fp8,fp8,0,0.040378667414188385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,fp8,0,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,128,0,1,fp8,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,1,128,0,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,2,128,0,1,fp8,fp8,0,0.037962667644023895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,float16,0,0.04271999994913737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,fp8,0,0.04342400034268697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,float16,0,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,12,128,0,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,float16,0,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,fp8,0,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,1,128,0,1,fp8,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,2,128,0,1,fp8,fp8,0,0.02641066660483678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,12,4,128,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,float16,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,12,128,0,1,fp8,fp8,0,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,1,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,2,128,0,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,12,4,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,12,128,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,float16,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,fp8,0,0.022111999491850536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,1,128,0,1,fp8,fp8,0,0.020186666399240494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,2,128,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,float16,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,fp8,0,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,12,4,128,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,12,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,1,128,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,2,128,0,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,float16,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,12,4,128,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,float16,0,0.21408534049987793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,fp8,0,0.21343467632929483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,1,128,0,1,fp8,fp8,0,0.20460800329844156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,float16,0,0.2195146679878235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,fp8,0,0.220085342725118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,2,128,0,1,fp8,fp8,0,0.2141866683959961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,float16,0,0.22301334142684937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,fp8,0,0.22125333547592163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,float16,0,0.13377599914868674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,12,4,128,0,1,fp8,fp8,0,0.21654399236043295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,fp8,0,0.1308746635913849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,12,128,0,1,fp8,fp8,0,0.1299199958642324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,float16,0,0.11654933293660481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,fp8,0,0.11548800269762675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,1,128,0,1,fp8,fp8,0,0.10597866773605347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,float16,0,0.11824533343315125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,float16,0,0.07544533411661784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,fp8,0,0.11916266878445943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,2,128,0,1,fp8,fp8,0,0.11401599645614624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,float16,0,0.12097600102424622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,fp8,0,0.12001599868138631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,12,4,128,0,1,fp8,fp8,0,0.11724266409873962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,fp8,0,0.07446399827798207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,12,128,0,1,fp8,fp8,0,0.07620800038178761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,float16,0,0.06726933519045512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,fp8,0,0.0673226664463679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,1,128,0,1,fp8,fp8,0,0.06004266440868378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,float16,0,0.06808533271153767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,fp8,0,0.06715733309586842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,2,128,0,1,fp8,fp8,0,0.059749335050582886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,float16,0,0.06806933383146922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,fp8,0,0.0674186646938324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,float16,0,0.041365332901477814
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,12,4,128,0,1,fp8,fp8,0,0.0629120022058487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,128,0,1,fp8,fp8,0,0.036805334190527596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,float16,0,0.04252799848715464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,fp8,0,0.042730664213498436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,12,128,0,1,fp8,fp8,0,0.040133332212766014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,float16,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,fp8,0,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,float16,0,0.04152533411979675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,fp8,0,0.04082666585842768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,2,128,0,1,fp8,fp8,0,0.03728533287843069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,12,4,128,0,1,fp8,fp8,0,0.03807466725508372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,float16,0,0.03091199944416682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,fp8,0,0.030778666337331135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,12,128,0,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,1,128,0,1,fp8,fp8,0,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,float16,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,2,128,0,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,float16,0,0.029978667696317036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,12,4,128,0,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,12,128,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,1,128,0,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,2,128,0,1,fp8,fp8,0,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,12,4,128,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,12,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,float16,0,0.017978666971127193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,2,128,0,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,12,4,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,12,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,float16,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,1,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,2,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,12,4,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,float16,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,12,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,1,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,fp8,0,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,2,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,12,4,128,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,float16,0,0.1442400018374125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,fp8,0,0.1434879998366038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,1,128,0,1,fp8,fp8,0,0.13224533200263977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,float16,0,0.1461066703001658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,fp8,0,0.14492799838383993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,2,128,0,1,fp8,fp8,0,0.1397546629110972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,float16,0,0.14820266763369241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,fp8,0,0.14793066183725992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,12,4,128,0,1,fp8,fp8,0,0.14225600163141885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,float16,0,0.08887466788291931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,fp8,0,0.08646399776140849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,12,128,0,1,fp8,fp8,0,0.0867039958635966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,float16,0,0.07935466865698497
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,fp8,0,0.08055466910203297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,1,128,0,1,fp8,fp8,0,0.07075199981530507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,float16,0,0.08061333497365315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,float16,0,0.08063999811808269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,fp8,0,0.08077866832415263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,2,128,0,1,fp8,fp8,0,0.0727893312772115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,128,0,1,fp8,fp8,0,0.04674666623274485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,fp8,0,0.08171733220418294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,12,4,128,0,1,fp8,fp8,0,0.07659199833869934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,float16,0,0.049813335140546165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,fp8,0,0.0516533354918162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,float16,0,0.04827199876308441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,float16,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,fp8,0,0.04929600159327189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,1,128,0,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,float16,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,fp8,0,0.04826133449872335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,2,128,0,1,fp8,fp8,0,0.045370668172836304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,12,4,128,0,1,fp8,fp8,0,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,float16,0,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,12,128,0,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,float16,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,fp8,0,0.03086400032043457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,1,128,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,float16,0,0.0308693324526151
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,128,0,1,fp8,fp8,0,0.030794667700926464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,fp8,0,0.030832000076770782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,2,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,fp8,0,0.03242133309443792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,float16,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,12,128,0,1,fp8,fp8,0,0.02462399999300639
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,1,128,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,2,128,0,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,12,4,128,0,1,fp8,fp8,0,0.024714666108290356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,12,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,2,128,0,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,float16,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,12,4,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,1,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,2,128,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,12,4,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,float16,0,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,12,128,0,1,fp8,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,1,128,0,1,fp8,fp8,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,2,128,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,float16,0,0.01623999948302905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,12,4,128,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,12,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,float16,0,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,1,128,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,2,128,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,128,0,1,fp8,fp8,0,0.09949866930643718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,12,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,float16,0,0.10935466488202412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,fp8,0,0.10945600271224976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,float16,0,0.10886399944623311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,fp8,0,0.11051733295122783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,2,128,0,1,fp8,fp8,0,0.10084799925486247
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,float16,0,0.1107360025246938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,fp8,0,0.10941867033640544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,12,4,128,0,1,fp8,fp8,0,0.10288000106811523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,float16,0,0.06459733347098033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,fp8,0,0.06531199812889099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,12,128,0,1,fp8,fp8,0,0.06020799775918325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,fp8,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,float16,0,0.061903998255729675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,fp8,0,0.062080000837643944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,float16,0,0.06526400148868561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,1,128,0,1,fp8,fp8,0,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,float16,0,0.06206933160622915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,2,128,0,1,fp8,fp8,0,0.057818666100502014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,fp8,0,0.0629120022058487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,12,4,128,0,1,fp8,fp8,0,0.05810666580994924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,float16,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,float16,0,0.0395413339138031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,12,128,0,1,fp8,fp8,0,0.0393653338154157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,float16,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,1,128,0,1,fp8,fp8,0,0.035887998839219414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,2,128,0,1,fp8,fp8,0,0.03717333326737086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,float16,0,0.04165333261092504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,fp8,0,0.04106666644414266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,12,4,128,0,1,fp8,fp8,0,0.03748266647259394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,128,0,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,float16,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,float16,0,0.028090665737787884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,12,128,0,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,float16,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,2,128,0,1,fp8,fp8,0,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,12,4,128,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,12,128,0,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,1,128,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,float16,0,0.02019199977318446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,2,128,0,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,12,4,128,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,12,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,float16,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,1,128,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,2,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,12,4,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,12,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,1,128,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,2,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,12,4,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,12,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,1,128,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,2,128,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,12,4,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,float16,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,12,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,1,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,2,128,0,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,12,4,128,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,float16,0,0.09220799803733826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,fp8,0,0.09098666906356812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,1,128,0,1,fp8,fp8,0,0.08647466699282329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,float16,0,0.09155199925104777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,fp8,0,0.09241066376368205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,2,128,0,1,fp8,fp8,0,0.08683199683825175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,float16,0,0.0912000040213267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,fp8,0,0.09099200367927551
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,12,4,128,0,1,fp8,fp8,0,0.08754666646321614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,float16,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,128,0,1,fp8,fp8,0,0.05031999945640564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,fp8,0,0.055071999629338585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,12,128,0,1,fp8,fp8,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,128,0,1,fp8,fp8,0,0.05026133358478546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,float16,0,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,fp8,0,0.05351999898751577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,float16,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,fp8,0,0.052890668312708534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,128,0,1,fp8,fp8,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,float16,0,0.05494933327039083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,12,4,128,0,1,fp8,fp8,0,0.051856001218159996
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,float16,0,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,fp8,0,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,float16,0,0.03494933247566223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,fp8,0,0.03504000107447306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,1,128,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,128,0,1,fp8,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,float16,0,0.02481066683928172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,float16,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,2,128,0,1,fp8,fp8,0,0.03499199946721395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,float16,0,0.03645866612593333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,fp8,0,0.03596800069014231
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,12,128,0,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,1,128,0,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,128,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,float16,0,0.023818666736284893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,fp8,0,0.023733332753181458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,2,128,0,1,fp8,fp8,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,12,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,float16,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,1,128,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,2,128,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,12,4,128,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,12,128,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,1,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,float16,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,2,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,12,4,128,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,float16,0,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,12,128,0,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,1,128,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,float16,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,2,128,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,12,4,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,12,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,1,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,2,128,0,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,12,4,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,12,128,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,1,128,0,1,fp8,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,2,128,0,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,fp8,0,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,128,0,1,fp8,fp8,0,3.005253473917643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,float16,0,3.7970558802286782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,fp8,0,3.873920122782389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,128,0,1,fp8,fp8,0,3.0494346618652344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,float16,0,3.7373971939086914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,fp8,0,3.941706657409668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,float16,0,4.070826530456543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,float16,0,1.96669340133667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,fp8,0,1.9451200167338054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,128,0,1,fp8,fp8,0,1.90119473139445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,float16,0,1.890394687652588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,128,0,1,fp8,fp8,0,3.0979200998942056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,fp8,0,3.9046398798624673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,fp8,0,1.9261706670125325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,128,0,1,fp8,fp8,0,1.760650634765625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,128,0,1,fp8,fp8,0,1.5989119211832683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,float16,0,1.853775978088379
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,fp8,0,1.9187946319580078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,float16,0,1.0421173572540283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,float16,0,1.911893367767334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,fp8,0,1.8897813161214192
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,128,0,1,fp8,fp8,0,1.7617866198221843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,128,0,1,fp8,fp8,0,0.9067786534627279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,fp8,0,1.0516479810078938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,float16,0,1.0195146401723225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,fp8,0,1.090336004892985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,128,0,1,fp8,fp8,0,0.8985333442687988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,float16,0,1.0455786387125652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,128,0,1,fp8,fp8,0,0.8833333651224772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,fp8,0,1.036186695098877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,float16,0,1.0197813510894775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,float16,0,0.6059466600418091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,fp8,0,1.0365440050760906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,128,0,1,fp8,fp8,0,0.8955039978027344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,fp8,0,0.5990879933039347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,128,0,1,fp8,fp8,0,0.570522665977478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,float16,0,0.5778719981511434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,fp8,0,0.5786933501561483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,128,0,1,fp8,fp8,0,0.5284693241119385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,float16,0,0.5840906699498495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,fp8,0,0.5879146655400594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,128,0,1,fp8,fp8,0,0.5267733335494995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,float16,0,0.5864799817403158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,fp8,0,0.5900106827418009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,128,0,1,fp8,fp8,0,0.5297653277715048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,float16,0,2.284735997517904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,128,0,1,fp8,fp8,0,1.818709373474121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,fp8,0,2.2287680308024087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,float16,0,2.2653172810872397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,128,0,1,fp8,fp8,0,1.8333706855773926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,fp8,0,2.232144037882487
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,float16,0,2.212303956349691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,fp8,0,2.2838026682535806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,float16,0,1.1732746760050456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,fp8,0,1.197013298670451
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,128,0,1,fp8,fp8,0,1.092682679494222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,float16,0,1.1261173089345295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,128,0,1,fp8,fp8,0,2.0760374069213867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,fp8,0,1.2804426352183025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,128,0,1,fp8,fp8,0,1.0542826652526855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,float16,0,1.273861328760783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,fp8,0,1.159237305323283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,128,0,1,fp8,fp8,0,1.0109919706980388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,float16,0,1.1582187016805012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,float16,0,0.677903970082601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,fp8,0,1.1436213652292888
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,128,0,1,fp8,fp8,0,1.017690658569336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,fp8,0,0.6567413409550985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,128,0,1,fp8,fp8,0,0.6142239967981974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,float16,0,0.6557546854019165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,fp8,0,0.6175786654154459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,fp8,0,0.6221866607666016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,128,0,1,fp8,fp8,0,0.5773066679636637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,float16,0,0.6206933259963989
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,128,0,1,fp8,fp8,0,0.5539573431015015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,float16,0,0.6313600142796835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,fp8,0,0.631162683169047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,float16,0,0.38067201773325604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,128,0,1,fp8,fp8,0,0.5731360117594401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,fp8,0,0.3651839892069499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,128,0,1,fp8,fp8,0,0.3377813498179118
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,fp8,0,0.3829600016276042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,128,0,1,fp8,fp8,0,0.35516266028086346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,float16,0,0.3675040006637573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,float16,0,0.3657066822052002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,fp8,0,0.3686666488647461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,128,0,1,fp8,fp8,0,0.3380746841430664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,float16,0,0.36906667550404865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,fp8,0,0.3709919850031535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,128,0,1,fp8,fp8,0,0.34352533022562665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,float16,0,1.5823839505513508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,128,0,1,fp8,fp8,0,1.327781359354655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,fp8,0,1.5439252853393555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,float16,0,1.595370610555013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,128,0,1,fp8,fp8,0,1.339087963104248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,fp8,0,1.5801173845926921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,float16,0,1.5709279378255208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,fp8,0,1.5814879735310872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,float16,0,0.8559040228525797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,fp8,0,0.8695573012034098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,128,0,1,fp8,fp8,0,1.377669334411621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,128,0,1,fp8,fp8,0,0.8351786931355795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,float16,0,0.81605331103007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,fp8,0,0.8338720003763834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,128,0,1,fp8,fp8,0,0.7801226774851481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,float16,0,0.8323787053426107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,fp8,0,0.8275626500447592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,128,0,1,fp8,fp8,0,0.7295573552449545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,float16,0,0.837994654973348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,float16,0,0.48811201254526776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,fp8,0,0.833791971206665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,128,0,1,fp8,fp8,0,0.7521759668986002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,fp8,0,0.4863733450571696
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,128,0,1,fp8,fp8,0,0.43331201871236164
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,128,0,1,fp8,fp8,0,0.4232693513234456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,float16,0,0.4599253336588542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,fp8,0,0.45706133047739667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,fp8,0,0.4630933205286662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,float16,0,0.4590826829274495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,128,0,1,fp8,fp8,0,0.41861867904663086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,128,0,1,fp8,fp8,0,0.42267199357350665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,float16,0,0.46670933564503986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,fp8,0,0.47038400173187256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,float16,0,0.296122670173645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,fp8,0,0.2954346736272176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,128,0,1,fp8,fp8,0,0.264629324277242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,128,0,1,fp8,fp8,0,0.2739306688308716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,float16,0,0.285861333211263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,fp8,0,0.28697067499160767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,float16,0,0.28785600264867145
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,fp8,0,0.28913599252700806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,128,0,1,fp8,fp8,0,0.2670666575431824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,fp8,0,0.2886613408724467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,128,0,1,fp8,fp8,0,0.26443199316660565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,float16,0,0.290175994237264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,float16,0,2.079098701477051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,128,0,1,fp8,fp8,0,1.7542346318562825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,fp8,0,2.108959992726644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,float16,0,2.1526986757914224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,128,0,1,fp8,fp8,0,1.769877274831136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,fp8,0,2.1353759765625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,float16,0,2.09769598642985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,fp8,0,2.1787145932515464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,float16,0,1.1066346963246663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,fp8,0,1.2209866841634114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,128,0,1,fp8,fp8,0,1.131930669148763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,float16,0,1.0547893047332764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,128,0,1,fp8,fp8,0,1.9207413991292317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,fp8,0,1.0870719750722249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,128,0,1,fp8,fp8,0,1.0766133467356365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,float16,0,1.0765706698099773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,fp8,0,1.0681440035502117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,128,0,1,fp8,fp8,0,0.9564747015635172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,float16,0,0.6115093231201172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,float16,0,1.099519968032837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,fp8,0,0.6098346710205078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,fp8,0,1.0874826908111572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,128,0,1,fp8,fp8,0,0.9562346935272217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,128,0,1,fp8,fp8,0,0.5621973276138306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,float16,0,0.5882933139801025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,fp8,0,0.5735306739807129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,128,0,1,fp8,fp8,0,0.5158240000406901
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,float16,0,0.5774399836858114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,fp8,0,0.573802669843038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,128,0,1,fp8,fp8,0,0.5261653264363607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,float16,0,0.5792746543884277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,fp8,0,0.5818826754887899
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,128,0,1,fp8,fp8,0,0.5337973435719808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,float16,0,0.34162668387095135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,fp8,0,0.34465599060058594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,128,0,1,fp8,fp8,0,0.3131999969482422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,float16,0,0.321669340133667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,128,0,1,fp8,fp8,0,0.29924799998601276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,fp8,0,0.3224266568819682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,128,0,1,fp8,fp8,0,0.2932906746864319
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,float16,0,0.32365866502126056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,fp8,0,0.3267786701520284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,fp8,0,0.2196000019709269
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,float16,0,0.33229867617289227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,fp8,0,0.3304213285446167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,128,0,1,fp8,fp8,0,0.3036053379376729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,float16,0,0.2161653240521749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,128,0,1,fp8,fp8,0,0.20254933834075928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,float16,0,0.21318932374318442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,fp8,0,0.21369065841039023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,128,0,1,fp8,fp8,0,0.1963520050048828
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,float16,0,0.2118026614189148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,fp8,0,0.2109066645304362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,128,0,1,fp8,fp8,0,0.19593600432078043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,float16,0,0.213754673798879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,fp8,0,0.21282132466634116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,128,0,1,fp8,fp8,0,0.19776000579198202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,float16,0,1.250597318013509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,128,0,1,fp8,fp8,0,1.0927626291910808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,fp8,0,1.256442705790202
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,float16,0,1.2554346720377605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,fp8,0,1.2688586711883545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,128,0,1,fp8,fp8,0,1.1029226779937744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,float16,0,1.2831093470255535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,float16,0,0.7001492977142334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,fp8,0,0.730400005976359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,fp8,0,1.2827733357747395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,128,0,1,fp8,fp8,0,1.1316160360972087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,128,0,1,fp8,fp8,0,0.653434673945109
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,128,0,1,fp8,fp8,0,0.5802719990412394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,float16,0,0.6605066855748495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,fp8,0,0.6649493376413981
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,float16,0,0.6658986806869507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,fp8,0,0.6751840114593506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,128,0,1,fp8,fp8,0,0.5870826641718546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,float16,0,0.6716266473134359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,fp8,0,0.6793599923451742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,float16,0,0.3816373348236084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,128,0,1,fp8,fp8,0,0.5936319828033447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,fp8,0,0.3854026794433594
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,128,0,1,fp8,fp8,0,0.3503093322118123
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,float16,0,0.3545600175857544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,fp8,0,0.3574666579564412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,128,0,1,fp8,fp8,0,0.32661867141723633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,float16,0,0.36212801933288574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,fp8,0,0.3632320165634155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,128,0,1,fp8,fp8,0,0.3288960059483846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,float16,0,0.36764800548553467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,fp8,0,0.37066133817036945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,128,0,1,fp8,fp8,0,0.3343520164489746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,float16,0,0.22386133670806885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,fp8,0,0.22512000799179077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,128,0,1,fp8,fp8,0,0.20826667547225952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,float16,0,0.21211733420689902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,float16,0,0.21079999208450317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,fp8,0,0.21307732661565146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,float16,0,0.21632534265518188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,128,0,1,fp8,fp8,0,0.19335466623306274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,fp8,0,0.21382933855056763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,128,0,1,fp8,fp8,0,0.1933493415514628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,fp8,0,0.21719467639923096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,128,0,1,fp8,fp8,0,0.2007146676381429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,float16,0,0.1471680005391439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,fp8,0,0.1476853291193644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,128,0,1,fp8,fp8,0,0.13758400082588196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,float16,0,0.14404799540837607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,fp8,0,0.1418773333231608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,128,0,1,fp8,fp8,0,0.1332533359527588
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,float16,0,0.14351466298103333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,fp8,0,0.14633066455523172
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,128,0,1,fp8,fp8,0,0.13365866740544638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,fp8,0,0.1461013356844584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,float16,0,0.14390400052070618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,128,0,1,fp8,fp8,0,0.13437333703041077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,float16,0,1.2738933563232422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,fp8,0,1.2759839693705242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,128,0,1,fp8,fp8,0,1.1186933517456055
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,float16,0,1.2818506558736165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,128,0,1,fp8,fp8,0,1.1341119607289631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,fp8,0,1.3071040312449138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,float16,0,1.2983626524607341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,fp8,0,1.3274292945861816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,fp8,0,0.7112533251444498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,float16,0,0.7119839986165365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,128,0,1,fp8,fp8,0,1.1767146587371826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,128,0,1,fp8,fp8,0,0.6307466824849447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,float16,0,0.6512373288472494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,fp8,0,0.664352019627889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,fp8,0,0.6627413431803385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,128,0,1,fp8,fp8,0,0.6241600116093954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,float16,0,0.6705439885457357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,128,0,1,fp8,fp8,0,0.5916213194529215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,float16,0,0.3791786829630534
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,float16,0,0.6728479862213135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,fp8,0,0.6743626594543457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,128,0,1,fp8,fp8,0,0.6007466713587443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,fp8,0,0.3798186779022217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,128,0,1,fp8,fp8,0,0.3445066610972087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,float16,0,0.3489760160446167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,fp8,0,0.3476266860961914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,128,0,1,fp8,fp8,0,0.31828800837198895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,float16,0,0.35204799969991046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,fp8,0,0.35286935170491535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,128,0,1,fp8,fp8,0,0.3206719954808553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,float16,0,0.3605813185373942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,fp8,0,0.3601440191268921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,128,0,1,fp8,fp8,0,0.32654400666554767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,float16,0,0.21294933557510376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,fp8,0,0.21584532658259073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,128,0,1,fp8,fp8,0,0.19744000832239786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,float16,0,0.1967680056889852
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,fp8,0,0.19568000237147012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,128,0,1,fp8,fp8,0,0.17962666352589926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,float16,0,0.19834667444229126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,fp8,0,0.19949867328008017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,128,0,1,fp8,fp8,0,0.18506133556365967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,float16,0,0.20399999618530273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,fp8,0,0.2049600084622701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,128,0,1,fp8,fp8,0,0.19023466110229492
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,float16,0,0.13293866316477457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,fp8,0,0.13386133313179016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,128,0,1,fp8,fp8,0,0.12570666273434958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,float16,0,0.12717866897583008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,fp8,0,0.12818132837613425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,128,0,1,fp8,fp8,0,0.11693333586057027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,float16,0,0.12744533022244772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,fp8,0,0.1301759978135427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,128,0,1,fp8,fp8,0,0.11760000387827556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,fp8,0,0.08653333783149719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,float16,0,0.12854933738708496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,fp8,0,0.12844266494115195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,fp8,0,0.08462933699289958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,128,0,1,fp8,fp8,0,0.11935999989509583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,float16,0,0.08686932921409607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,128,0,1,fp8,fp8,0,0.08278400202592213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,float16,0,0.08437333504358928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,128,0,1,fp8,fp8,0,0.08041599889596303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,float16,0,0.08495466907819112
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,fp8,0,0.08522666494051616
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,128,0,1,fp8,fp8,0,0.08076266447703044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,float16,0,0.08480000495910645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,fp8,0,0.0865760048230489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,128,0,1,fp8,fp8,0,0.08062933385372162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,float16,0,0.8061280250549316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,fp8,0,0.8201493422190348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,128,0,1,fp8,fp8,0,0.7204000155131022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,float16,0,0.8231840133666992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,fp8,0,0.835536003112793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,128,0,1,fp8,fp8,0,0.7358240286509196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,float16,0,0.8388373057047526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,float16,0,0.45865599314371747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,fp8,0,0.8416799704233805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,128,0,1,fp8,fp8,0,0.7543253103892008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,fp8,0,0.46562135219573975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,fp8,0,0.4246400197347005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,128,0,1,fp8,fp8,0,0.4188213348388672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,float16,0,0.42080533504486084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,128,0,1,fp8,fp8,0,0.380079984664917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,float16,0,0.4248480002085368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,fp8,0,0.43372801939646405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,128,0,1,fp8,fp8,0,0.3863573471705119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,fp8,0,0.44173868497212726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,float16,0,0.43852798144022626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,float16,0,0.22826667626698813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,128,0,1,fp8,fp8,0,0.39558398723602295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,128,0,1,fp8,fp8,0,0.2129866679509481
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,float16,0,0.25121599435806274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,fp8,0,0.2540106574694316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,128,0,1,fp8,fp8,0,0.21548799673716226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,128,0,1,fp8,fp8,0,0.23151999711990356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,fp8,0,0.22946133216222128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,float16,0,0.14563733339309692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,float16,0,0.23360000054041544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,fp8,0,0.14891733725865683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,fp8,0,0.23455999294916788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,float16,0,0.24006932973861694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,fp8,0,0.13372266292572021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,fp8,0,0.24373332659403482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,128,0,1,fp8,fp8,0,0.22126400470733643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,128,0,1,fp8,fp8,0,0.1389226714769999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,float16,0,0.13434666395187378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,128,0,1,fp8,fp8,0,0.12165866295496623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,float16,0,0.13499200344085693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,fp8,0,0.13564266761144003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,128,0,1,fp8,fp8,0,0.1239946683247884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,float16,0,0.13797332843144736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,fp8,0,0.1404266655445099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,128,0,1,fp8,fp8,0,0.12985066572825113
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,float16,0,0.09397332866986592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,fp8,0,0.0946613351504008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,128,0,1,fp8,fp8,0,0.08682133754094441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,float16,0,0.09060800075531006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,fp8,0,0.09087466200192769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,fp8,0,0.09319466352462769
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,128,0,1,fp8,fp8,0,0.08471999565760295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,float16,0,0.06773333251476288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,128,0,1,fp8,fp8,0,0.08258133133252461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,float16,0,0.08986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,fp8,0,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,128,0,1,fp8,fp8,0,0.08444799979527791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,float16,0,0.09160533547401428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,fp8,0,0.06850666801134746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,128,0,1,fp8,fp8,0,0.0660693347454071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,float16,0,0.0664106657107671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,fp8,0,0.06638399759928386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,fp8,0,0.06832533578077953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,128,0,1,fp8,fp8,0,0.06234133243560791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,float16,0,0.06740266581376393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,fp8,0,0.06819200019041698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,128,0,1,fp8,fp8,0,0.06363200147946675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,float16,0,0.06666133304437001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,128,0,1,fp8,fp8,0,0.06394133468468984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,float16,0,0.8956267038981119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,fp8,0,0.9021066824595133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,128,0,1,fp8,fp8,0,0.7967893282572428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,float16,0,0.9102826913197836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,fp8,0,0.9211839834849039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,128,0,1,fp8,fp8,0,0.8119893074035645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,float16,0,0.9340906937917074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,float16,0,0.5051573514938354
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,fp8,0,0.9368586540222168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,128,0,1,fp8,fp8,0,0.8360693454742432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,fp8,0,0.5103093385696411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,128,0,1,fp8,fp8,0,0.46124267578125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,float16,0,0.45686932404836017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,fp8,0,0.45577065149943036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,128,0,1,fp8,fp8,0,0.4111040035883586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,float16,0,0.4635466734568278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,fp8,0,0.4654346704483032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,128,0,1,fp8,fp8,0,0.4190986553827922
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,float16,0,0.47669867674509686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,fp8,0,0.48214932282765705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,128,0,1,fp8,fp8,0,0.4333706696828206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,float16,0,0.2694399952888489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,fp8,0,0.2720853288968404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,128,0,1,fp8,fp8,0,0.24823466936747232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,float16,0,0.2418559988339742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,fp8,0,0.24367467562357584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,128,0,1,fp8,fp8,0,0.2227519949277242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,float16,0,0.25514666239420575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,float16,0,0.24489066998163858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,fp8,0,0.24883200724919638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,128,0,1,fp8,fp8,0,0.22804266214370728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,fp8,0,0.2553066611289978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,128,0,1,fp8,fp8,0,0.2344800035158793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,float16,0,0.1513759990533193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,fp8,0,0.1544266641139984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,128,0,1,fp8,fp8,0,0.14110933740933737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,float16,0,0.1323360006014506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,fp8,0,0.13355732957522073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,128,0,1,fp8,fp8,0,0.12365333239237468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,float16,0,0.13516799608866373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,fp8,0,0.13619200388590494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,128,0,1,fp8,fp8,0,0.12779200077056885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,128,0,1,fp8,fp8,0,0.08621866504351298
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,float16,0,0.1434453328450521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,fp8,0,0.14378666877746582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,128,0,1,fp8,fp8,0,0.13436800241470337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,float16,0,0.08988799651463826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,fp8,0,0.09084799885749817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,128,0,1,fp8,fp8,0,0.07794133325417836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,float16,0,0.0865226686000824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,float16,0,0.08453333377838135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,fp8,0,0.0848640004793803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,128,0,1,fp8,fp8,0,0.07648000121116638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,float16,0,0.08509332935015361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,fp8,0,0.08547733227411906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,fp8,0,0.0554720014333725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,128,0,1,fp8,fp8,0,0.07938666641712189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,float16,0,0.05593599875768026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,fp8,0,0.057487999399503074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,128,0,1,fp8,fp8,0,0.053823997577031456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,float16,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,128,0,1,fp8,fp8,0,0.05199466645717621
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,float16,0,0.055344000458717346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,fp8,0,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,128,0,1,fp8,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,float16,0,0.05620799958705902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,float16,0,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,128,0,1,fp8,fp8,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,float16,0,0.04961599906285604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,fp8,0,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,128,0,1,fp8,fp8,0,0.04748799900213877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,128,0,1,fp8,fp8,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,float16,0,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,fp8,0,0.0496319979429245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,128,0,1,fp8,fp8,0,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,float16,0,0.04991999765237173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,float16,0,0.6184800068537394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,fp8,0,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,128,0,1,fp8,fp8,0,0.047797332207361855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,fp8,0,0.623141328493754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,128,0,1,fp8,fp8,0,0.5531306664148966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,float16,0,0.6267840067545573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,fp8,0,0.6308106581370035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,128,0,1,fp8,fp8,0,0.5659626722335815
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,float16,0,0.6390026807785034
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,fp8,0,0.6414239803949991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,float16,0,0.3537866671880086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,float16,0,0.3147573272387187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,128,0,1,fp8,fp8,0,0.5752053260803223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,fp8,0,0.3573919932047526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,128,0,1,fp8,fp8,0,0.3250666658083598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,fp8,0,0.31699200471242267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,128,0,1,fp8,fp8,0,0.2890933354695638
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,fp8,0,0.3339680035909017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,float16,0,0.3206613262494405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,fp8,0,0.3237813313802083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,128,0,1,fp8,fp8,0,0.2927199999491374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,float16,0,0.3279306689898173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,float16,0,0.16845333576202393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,128,0,1,fp8,fp8,0,0.30186132589975995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,float16,0,0.19104532400767008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,fp8,0,0.1956160068511963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,128,0,1,fp8,fp8,0,0.17827733357747397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,fp8,0,0.16926399866739908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,128,0,1,fp8,fp8,0,0.1578986644744873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,float16,0,0.17262399196624756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,fp8,0,0.17441066106160483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,128,0,1,fp8,fp8,0,0.160453329483668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,float16,0,0.17847466468811035
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,fp8,0,0.18069867293039957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,128,0,1,fp8,fp8,0,0.16636799772580466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,fp8,0,0.09494933485984802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,float16,0,0.10845866799354553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,fp8,0,0.10999466975529988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,128,0,1,fp8,fp8,0,0.10353066523869832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,float16,0,0.09531199932098389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,128,0,1,fp8,fp8,0,0.08623466889063518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,fp8,0,0.09982400139172871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,float16,0,0.09624532858530681
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,fp8,0,0.09688533345858256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,128,0,1,fp8,fp8,0,0.0886346697807312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,float16,0,0.09912533561388652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,128,0,1,fp8,fp8,0,0.09637866417566936
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,fp8,0,0.06397333244482677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,float16,0,0.06439466774463654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,fp8,0,0.06600533425807953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,128,0,1,fp8,fp8,0,0.06163733204205831
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,float16,0,0.06404266754786174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,128,0,1,fp8,fp8,0,0.05773333211739858
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,float16,0,0.06387199958165486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,fp8,0,0.06419200201829274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,128,0,1,fp8,fp8,0,0.057114665706952415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,float16,0,0.06310933331648509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,fp8,0,0.06420266628265381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,float16,0,0.047269334395726524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,128,0,1,fp8,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,128,0,1,fp8,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,fp8,0,0.046911999583244324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,128,0,1,fp8,fp8,0,0.0447573314110438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,float16,0,0.04566933214664459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,float16,0,0.045109331607818604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,fp8,0,0.04557866851488749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,128,0,1,fp8,fp8,0,0.04346133271853129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,float16,0,0.04550399879614512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,fp8,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,128,0,1,fp8,fp8,0,0.043696001172065735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,float16,0,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,fp8,0,0.04229333500067393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,128,0,1,fp8,fp8,0,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,128,0,1,fp8,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,float16,0,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,float16,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,128,0,1,fp8,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,float16,0,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,fp8,0,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,128,0,1,fp8,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,float16,0,0.6366826693216959
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,fp8,0,0.634607990582784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,128,0,1,fp8,fp8,0,0.6112586657206217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,float16,0,0.6434346834818522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,fp8,0,0.6406826575597128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,128,0,1,fp8,fp8,0,0.6106773217519125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,float16,0,0.6720960140228271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,fp8,0,0.6698453426361084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,float16,0,0.37014933427174884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,fp8,0,0.36189866065979004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,128,0,1,fp8,fp8,0,0.6811839739481608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,128,0,1,fp8,fp8,0,0.3571306864420573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,float16,0,0.32679466406504315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,fp8,0,0.3265226682027181
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,128,0,1,fp8,fp8,0,0.31798932949701947
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,float16,0,0.3363200028737386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,128,0,1,fp8,fp8,0,0.3362133502960205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,fp8,0,0.33191466331481934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,128,0,1,fp8,fp8,0,0.3207040031750997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,128,0,1,fp8,fp8,0,0.19234132766723633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,float16,0,0.3487253189086914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,fp8,0,0.3445386489232381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,float16,0,0.19782400131225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,fp8,0,0.1948853333791097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,float16,0,0.17625067631403604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,fp8,0,0.1759200096130371
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,128,0,1,fp8,fp8,0,0.16768532991409302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,float16,0,0.17935999234517416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,fp8,0,0.18065067132314047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,128,0,1,fp8,fp8,0,0.17263466119766235
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,float16,0,0.18812799453735352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,fp8,0,0.18513067563374838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,128,0,1,fp8,fp8,0,0.17780800660451254
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,float16,0,0.11143466830253601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,fp8,0,0.11111467083295186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,128,0,1,fp8,fp8,0,0.10689600308736165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,float16,0,0.09709866841634114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,fp8,0,0.09698133667310078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,128,0,1,fp8,fp8,0,0.09028266867001851
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,float16,0,0.10044800241788228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,fp8,0,0.09874133268992107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,fp8,0,0.06325866778691609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,128,0,1,fp8,fp8,0,0.0916426678498586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,float16,0,0.06038933495680491
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,float16,0,0.10371200243631999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,128,0,1,fp8,fp8,0,0.05374933282534281
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,fp8,0,0.10392000277837117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,128,0,1,fp8,fp8,0,0.09909866253534953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,float16,0,0.06393600006898244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,128,0,1,fp8,fp8,0,0.0639519989490509
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,float16,0,0.061674664417902626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,fp8,0,0.06159466505050659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,fp8,0,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,float16,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,fp8,0,0.059978668888409935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,128,0,1,fp8,fp8,0,0.054383998115857445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,128,0,1,fp8,fp8,0,0.05844266712665558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,float16,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,128,0,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,fp8,0,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,128,0,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,128,0,1,fp8,fp8,0,0.03728533287843069
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,float16,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,128,0,1,fp8,fp8,0,0.03889599939187368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,float16,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,128,0,1,fp8,fp8,0,0.03195200115442276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,float16,0,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,fp8,0,0.03925866633653641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,fp8,0,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,float16,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,128,0,1,fp8,fp8,0,0.03030933439731598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,float16,0,0.0321066677570343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,fp8,0,0.03216533362865448
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,128,0,1,fp8,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,128,0,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,float16,0,0.032511999209721885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,fp8,0,0.03386666625738144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,128,0,1,fp8,fp8,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,float16,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,fp8,0,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,128,0,1,fp8,fp8,0,0.029029332101345062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,128,0,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,float16,0,0.030261332790056866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,128,0,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,float16,0,0.5455360015233358
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,fp8,0,0.540890653928121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,1,128,0,1,fp8,fp8,0,0.5269706646601359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,float16,0,0.5560693343480428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,fp8,0,0.5492639938990275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,2,128,0,1,fp8,fp8,0,0.5392320156097412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,float16,0,0.5786933501561483
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,fp8,0,0.5735840002695719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,float16,0,0.3200533390045166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,8,4,128,0,1,fp8,fp8,0,0.5837226708730062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,fp8,0,0.313210666179657
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,8,128,0,1,fp8,fp8,0,0.31408532460530597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,float16,0,0.2800266742706299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,fp8,0,0.28089600801467896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,1,128,0,1,fp8,fp8,0,0.27189334233601886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,float16,0,0.2885599931081136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,fp8,0,0.28460800647735596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,128,0,1,fp8,fp8,0,0.290608008702596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,2,128,0,1,fp8,fp8,0,0.27934932708740234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,float16,0,0.3004426757494609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,fp8,0,0.2961813410123189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,float16,0,0.17335466543833414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,fp8,0,0.1699946721394857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,8,128,0,1,fp8,fp8,0,0.16672533750534058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,float16,0,0.1511146624883016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,fp8,0,0.1502026617527008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,1,128,0,1,fp8,fp8,0,0.14416533708572388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,float16,0,0.15452266732851663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,fp8,0,0.15337066849072775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,2,128,0,1,fp8,fp8,0,0.1481706698735555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,float16,0,0.1612320045630137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,fp8,0,0.16080533464749655
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,fp8,0,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,8,4,128,0,1,fp8,fp8,0,0.15412267049153647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,float16,0,0.09714133540789287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,fp8,0,0.09492266178131104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,8,128,0,1,fp8,fp8,0,0.09353599945704143
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,float16,0,0.0830506682395935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,1,128,0,1,fp8,fp8,0,0.07445866862932841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,float16,0,0.08451732993125916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,fp8,0,0.08290133376916249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,2,128,0,1,fp8,fp8,0,0.080485333998998
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,float16,0,0.08973866701126099
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,fp8,0,0.0888159970442454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,8,4,128,0,1,fp8,fp8,0,0.08644800384839375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,float16,0,0.05369600156943003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,float16,0,0.05064000189304352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,fp8,0,0.05376533170541128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,8,128,0,1,fp8,fp8,0,0.05578133463859558
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,float16,0,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,fp8,0,0.05002133548259735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,1,128,0,1,fp8,fp8,0,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,fp8,0,0.05020800232887268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,2,128,0,1,fp8,fp8,0,0.04762666424115499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,128,0,1,fp8,fp8,0,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,float16,0,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,fp8,0,0.0517546683549881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,8,4,128,0,1,fp8,fp8,0,0.05008533100287119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,float16,0,0.03319466610749563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,fp8,0,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,1,128,0,1,fp8,fp8,0,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,128,0,1,fp8,fp8,0,0.03252800057331721
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,2,128,0,1,fp8,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,float16,0,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,fp8,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,8,128,0,1,fp8,fp8,0,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,1,128,0,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,128,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,fp8,0,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,2,128,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,8,128,0,1,fp8,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,128,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,float16,0,0.02367466688156128
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,1,128,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,float16,0,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,float16,0,0.024101334313551586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,8,4,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,float16,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,8,128,0,1,fp8,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,1,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,2,128,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,float16,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,8,4,128,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,float16,0,0.25165865818659466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,fp8,0,0.25036799907684326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,1,128,0,1,fp8,fp8,0,0.24697067340215048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,float16,0,0.2636106610298157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,fp8,0,0.27213867505391437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,128,0,1,fp8,fp8,0,0.2654506762822469
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,fp8,0,0.26027733087539673
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,2,128,0,1,fp8,fp8,0,0.2541653315226237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,float16,0,0.1355946660041809
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,float16,0,0.27699732780456543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,128,0,1,fp8,fp8,0,0.12839999794960022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,float16,0,0.15678399801254272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,fp8,0,0.15312000115712485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,8,128,0,1,fp8,fp8,0,0.15682133038838705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,fp8,0,0.13261333107948303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,float16,0,0.14008000493049622
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,fp8,0,0.13799466689427695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,2,128,0,1,fp8,fp8,0,0.13165332873662314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,float16,0,0.1479146679242452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,fp8,0,0.14518400033315024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,fp8,0,0.07461866736412048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,8,4,128,0,1,fp8,fp8,0,0.14173332850138345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,128,0,1,fp8,fp8,0,0.07004266480604808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,float16,0,0.09002133210500081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,fp8,0,0.08881066242853801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,8,128,0,1,fp8,fp8,0,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,fp8,0,0.0802400012811025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,float16,0,0.07437333464622498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,float16,0,0.07656533519426982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,fp8,0,0.07630933324495952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,128,0,1,fp8,fp8,0,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,2,128,0,1,fp8,fp8,0,0.07349333167076111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,float16,0,0.08229866623878479
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,8,4,128,0,1,fp8,fp8,0,0.08075733482837677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,float16,0,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,fp8,0,0.045509333411852516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,float16,0,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,float16,0,0.04578666885693868
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,fp8,0,0.04353066782156626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,1,128,0,1,fp8,fp8,0,0.0413973331451416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,float16,0,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,2,128,0,1,fp8,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,fp8,0,0.044079999128977455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,fp8,0,0.028373333315054577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,8,4,128,0,1,fp8,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,float16,0,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,8,128,0,1,fp8,fp8,0,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,float16,0,0.028512001037597656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,1,128,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,float16,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,2,128,0,1,fp8,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,128,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,8,4,128,0,1,fp8,fp8,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,float16,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,1,128,0,1,fp8,fp8,0,0.021514666577180225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,float16,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,2,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,8,4,128,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,8,128,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,1,128,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,float16,0,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,2,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,128,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,8,4,128,0,1,fp8,fp8,0,0.020794666061798733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,float16,0,0.02056533346573512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,1,128,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,2,128,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,8,4,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,fp8,0,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,8,128,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,1,128,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,2,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,8,4,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,float16,0,0.1442026694615682
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,fp8,0,0.14195199807484946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,1,128,0,1,fp8,fp8,0,0.1411893367767334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,float16,0,0.1481760044892629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,fp8,0,0.1440000037352244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,2,128,0,1,fp8,fp8,0,0.14220266540845236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,float16,0,0.1571466624736786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,128,0,1,fp8,fp8,0,0.09035733342170715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,fp8,0,0.1537866691748301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,fp8,0,0.07633600135644276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,8,4,128,0,1,fp8,fp8,0,0.15127999583880106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,float16,0,0.07933866480986278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,fp8,0,0.07826133569081624
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,float16,0,0.09053867061932881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,fp8,0,0.0888853371143341
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,float16,0,0.07841599980990092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,1,128,0,1,fp8,fp8,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,fp8,0,0.049322664737701416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,2,128,0,1,fp8,fp8,0,0.07452799876530965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,float16,0,0.08618666728337605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,fp8,0,0.08442667126655579
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,8,4,128,0,1,fp8,fp8,0,0.08261866867542267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,float16,0,0.04981866478919983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,8,128,0,1,fp8,fp8,0,0.04942933221658071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,float16,0,0.045968001087506614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,fp8,0,0.04572266836961111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,1,128,0,1,fp8,fp8,0,0.04346133271853129
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,128,0,1,fp8,fp8,0,0.04663999875386556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,float16,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,2,128,0,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,float16,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,float16,0,0.049642667174339294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,fp8,0,0.04826133449872335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,float16,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,8,128,0,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,1,128,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,float16,0,0.029898665845394135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,2,128,0,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,8,4,128,0,1,fp8,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,8,128,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,128,0,1,fp8,fp8,0,0.020021333048741024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,float16,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,1,128,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,8,4,128,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,fp8,0,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,float16,0,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,1,128,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,8,4,128,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,8,128,0,1,fp8,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,1,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,8,4,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,8,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,1,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,2,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,float16,0,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,8,4,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,8,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,1,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,2,128,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,8,4,128,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,float16,0,0.0988106628259023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,128,0,1,fp8,fp8,0,0.0946720043818156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,fp8,0,0.0988159974416097
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,1,128,0,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,float16,0,0.10046933094660442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,fp8,0,0.09909866253534953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,float16,0,0.10485866665840149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,fp8,0,0.10268800457318623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,float16,0,0.05807466804981232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,8,4,128,0,1,fp8,fp8,0,0.10147733489672343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,fp8,0,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,8,128,0,1,fp8,fp8,0,0.05796800057093302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,float16,0,0.05602133274078369
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,fp8,0,0.056143999099731445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,1,128,0,1,fp8,fp8,0,0.05003199974695841
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,float16,0,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,2,128,0,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,128,0,1,fp8,fp8,0,0.037018666664759316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,float16,0,0.03732266773780187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,float16,0,0.05806399881839752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,fp8,0,0.057034666339556374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,8,4,128,0,1,fp8,fp8,0,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,float16,0,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,128,0,1,fp8,fp8,0,0.03490666548411051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,128,0,1,fp8,fp8,0,0.03572800010442734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,1,128,0,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,float16,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,float16,0,0.03726933399836222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,float16,0,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,8,128,0,1,fp8,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,1,128,0,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,2,128,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,8,4,128,0,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,8,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,2,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,8,4,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,8,128,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,1,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,2,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,8,4,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,128,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,8,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,1,128,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,8,4,128,0,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,8,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,1,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,fp8,0,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,2,128,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,8,4,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,8,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,float16,0,0.07687999804814656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,2,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,128,0,1,fp8,fp8,0,0.07043200234572093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,fp8,0,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,8,4,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,fp8,0,0.07629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,float16,0,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,fp8,0,0.07687999804814656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,2,128,0,1,fp8,fp8,0,0.07213333249092102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,float16,0,0.07751999795436859
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,fp8,0,0.07698666552702586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,8,4,128,0,1,fp8,fp8,0,0.07502933343251546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,float16,0,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,8,128,0,1,fp8,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,float16,0,0.045594667394955955
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,float16,0,0.046682665745417275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,fp8,0,0.04555733501911163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,1,128,0,1,fp8,fp8,0,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,128,0,1,fp8,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,float16,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,fp8,0,0.046240001916885376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,2,128,0,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,fp8,0,0.04649066428343455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,fp8,0,0.03107733279466629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,8,128,0,1,fp8,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,float16,0,0.031104000906149547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,fp8,0,0.031194667021433514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,1,128,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,2,128,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,128,0,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,float16,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,8,4,128,0,1,fp8,fp8,0,0.03011200080315272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,float16,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,fp8,0,0.022485333184401195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,1,128,0,1,fp8,fp8,0,0.019808000574509304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,2,128,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,8,4,128,0,1,fp8,fp8,0,0.020266667008399963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,8,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,1,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,float16,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,2,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,8,4,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,1,128,0,1,fp8,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,2,128,0,1,fp8,fp8,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,8,4,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,float16,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,8,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,float16,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,2,128,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,128,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,8,4,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,1,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,2,128,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,8,4,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,8,128,0,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,1,128,0,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,float16,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,2,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,8,4,128,0,1,fp8,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,float16,0,0.06675733129183452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,fp8,0,0.06608533362547557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,1,128,0,1,fp8,fp8,0,0.062362665931383766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,float16,0,0.06622399886449178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,fp8,0,0.06774400174617767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,2,128,0,1,fp8,fp8,0,0.06388266881306966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,float16,0,0.06640000144640605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,fp8,0,0.06678933401902516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,8,4,128,0,1,fp8,fp8,0,0.06419733166694641
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,128,0,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,float16,0,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,8,128,0,1,fp8,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,float16,0,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,fp8,0,0.04057066639264425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,fp8,0,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,float16,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,2,128,0,1,fp8,fp8,0,0.03805333375930786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,float16,0,0.04053866614898046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,8,4,128,0,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,128,0,1,fp8,fp8,0,0.02693866689999898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,float16,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,fp8,0,0.027823999524116516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,8,128,0,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,float16,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,float16,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,float16,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,128,0,1,fp8,fp8,0,0.020618667205174763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,2,128,0,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,float16,0,0.019813333948453266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,8,4,128,0,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,float16,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,1,128,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,float16,0,0.020314666132132213
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,2,128,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,8,4,128,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,8,128,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,128,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,8,4,128,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,float16,0,0.016282666474580765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,8,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,float16,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,1,128,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,8,4,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,8,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,1,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,8,4,128,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,8,128,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,float16,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,1,128,0,1,fp8,fp8,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,float16,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,2,128,0,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,8,4,128,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,8,128,0,1,fp8,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,float16,0,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,1,128,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,2,128,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,8,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,float16,0,1.9161067008972168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,128,0,1,fp8,fp8,0,1.5816106796264648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,fp8,0,1.8753867149353027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,float16,0,1.0132160186767578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,fp8,0,1.0282453695933025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,float16,0,1.8628053665161133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,fp8,0,1.961674690246582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,128,0,1,fp8,fp8,0,1.5908692677815754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,128,0,1,fp8,fp8,0,0.943727970123291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,float16,0,0.9900853633880615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,128,0,1,fp8,fp8,0,0.8649866580963135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,fp8,0,1.0178399880727131
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,float16,0,1.008021354675293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,float16,0,0.582693338394165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,fp8,0,1.0892000198364258
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,128,0,1,fp8,fp8,0,0.8994932969411215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,fp8,0,0.6045226653416952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,128,0,1,fp8,fp8,0,0.5175893306732178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,float16,0,0.5573386748631796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,fp8,0,0.5625120004018148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,128,0,1,fp8,fp8,0,0.5048906803131104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,float16,0,0.5623733202616373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,fp8,0,0.5738133192062378
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,float16,0,0.36157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,128,0,1,fp8,fp8,0,0.514789342880249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,fp8,0,0.3609706560770671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,128,0,1,fp8,fp8,0,0.33214932680130005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,float16,0,0.35683735211690265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,fp8,0,0.3598666588465373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,128,0,1,fp8,fp8,0,0.3227733373641968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,float16,0,0.35645333925882977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,fp8,0,0.3591466744740804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,128,0,1,fp8,fp8,0,0.32435200611750287
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,float16,0,1.1190026601155598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,fp8,0,1.1206666628519695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,128,0,1,fp8,fp8,0,0.9697866439819336
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,float16,0,1.1173386573791504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,float16,0,0.6270613272984823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,fp8,0,1.1298720041910808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,128,0,1,fp8,fp8,0,0.9803040027618408
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,fp8,0,0.638213316599528
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,128,0,1,fp8,fp8,0,0.559002677599589
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,float16,0,0.6103786627451578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,fp8,0,0.6120479901631674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,128,0,1,fp8,fp8,0,0.5422240098317465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,float16,0,0.654362678527832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,fp8,0,0.6150933504104614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,float16,0,0.36512001355489093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,128,0,1,fp8,fp8,0,0.5491893291473389
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,float16,0,0.35151998202006024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,fp8,0,0.3669546842575073
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,128,0,1,fp8,fp8,0,0.3364479939142863
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,fp8,0,0.36735999584198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,128,0,1,fp8,fp8,0,0.3203253348668416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,float16,0,0.35600535074869794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,fp8,0,0.23783999681472778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,128,0,1,fp8,fp8,0,0.32676267623901367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,fp8,0,0.35282135009765625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,fp8,0,0.23428799708684286
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,float16,0,0.23463465770085654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,128,0,1,fp8,fp8,0,0.21824532747268677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,float16,0,0.2339306672414144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,128,0,1,fp8,fp8,0,0.21387199560801187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,float16,0,0.2342346707979838
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,128,0,1,fp8,fp8,0,0.2130720019340515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,fp8,0,0.23543467124303183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,fp8,0,0.8251679738362631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,float16,0,0.8140587011973063
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,128,0,1,fp8,fp8,0,0.7156853675842285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,float16,0,0.8211413224538168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,float16,0,0.46614400545756024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,fp8,0,0.8211092948913574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,128,0,1,fp8,fp8,0,0.7262187004089355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,fp8,0,0.4718240102132161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,128,0,1,fp8,fp8,0,0.4209333260854085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,float16,0,0.45420801639556885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,fp8,0,0.4479893445968628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,128,0,1,fp8,fp8,0,0.4078773260116577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,float16,0,0.4550559918085734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,fp8,0,0.4554026524225871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,128,0,1,fp8,fp8,0,0.41012799739837646
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,float16,0,0.28387200832366943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,128,0,1,fp8,fp8,0,0.24839999278386435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,fp8,0,0.28514667352040607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,128,0,1,fp8,fp8,0,0.2618933320045471
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,float16,0,0.2749600013097127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,fp8,0,0.27591999371846515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,float16,0,0.27556800842285156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,fp8,0,0.277402659257253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,128,0,1,fp8,fp8,0,0.25279466311136883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,fp8,0,0.17667200167973837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,float16,0,0.1765013337135315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,fp8,0,0.1789919932683309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,128,0,1,fp8,fp8,0,0.1625226636727651
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,float16,0,0.17624000708262125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,128,0,1,fp8,fp8,0,0.16064533591270447
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,float16,0,0.1750133236249288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,fp8,0,0.1770026683807373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,128,0,1,fp8,fp8,0,0.1609280010064443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,float16,0,1.0630613168080647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,fp8,0,1.0763253370920818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,128,0,1,fp8,fp8,0,0.9268213113149008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,float16,0,1.0716319878896077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,float16,0,0.5912853479385376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,fp8,0,1.068837324778239
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,128,0,1,fp8,fp8,0,0.9366986751556396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,fp8,0,0.5879093408584595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,128,0,1,fp8,fp8,0,0.5368746519088745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,float16,0,0.5666666825612386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,float16,0,0.5746293465296427
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,fp8,0,0.5639626582463583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,128,0,1,fp8,fp8,0,0.5008373260498047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,fp8,0,0.5724639892578125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,float16,0,0.3299039999643962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,128,0,1,fp8,fp8,0,0.5090719858805338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,fp8,0,0.33470932642618817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,128,0,1,fp8,fp8,0,0.3020586570103963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,float16,0,0.3147466580073039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,fp8,0,0.31565866867701214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,float16,0,0.2052746613820394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,128,0,1,fp8,fp8,0,0.2900159955024719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,float16,0,0.3194986581802368
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,float16,0,0.2029119928677877
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,fp8,0,0.3231253425280253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,128,0,1,fp8,fp8,0,0.29579732815424603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,fp8,0,0.20593067010243735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,128,0,1,fp8,fp8,0,0.1925493280092875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,fp8,0,0.20163200298945108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,128,0,1,fp8,fp8,0,0.18516800800959268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,float16,0,0.13371733824412027
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,128,0,1,fp8,fp8,0,0.18386665980021158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,float16,0,0.20246400435765585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,fp8,0,0.20357867081960043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,fp8,0,0.13435733318328857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,128,0,1,fp8,fp8,0,0.12569066882133484
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,float16,0,0.13191999991734824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,fp8,0,0.13396267096201578
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,128,0,1,fp8,fp8,0,0.12444266676902771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,float16,0,0.1337440013885498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,fp8,0,0.13401599725087485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,128,0,1,fp8,fp8,0,0.12553600470225015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,fp8,0,0.6605706612269083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,float16,0,0.6577920118967692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,128,0,1,fp8,fp8,0,0.5849973360697428
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,float16,0,0.6669279734293619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,fp8,0,0.6677013238271078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,float16,0,0.3763466676076253
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,128,0,1,fp8,fp8,0,0.594709316889445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,fp8,0,0.3792693217595418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,128,0,1,fp8,fp8,0,0.3397973378499349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,float16,0,0.3569920063018799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,fp8,0,0.35503466924031574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,128,0,1,fp8,fp8,0,0.32916800181070965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,128,0,1,fp8,fp8,0,0.32237333059310913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,float16,0,0.363322655359904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,fp8,0,0.3637760082880656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,float16,0,0.21625600258509317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,fp8,0,0.2179786761601766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,128,0,1,fp8,fp8,0,0.2007146676381429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,float16,0,0.20516266425450644
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,fp8,0,0.20524267355600992
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,128,0,1,fp8,fp8,0,0.19339199860890707
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,128,0,1,fp8,fp8,0,0.18683199087778726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,float16,0,0.20941867431004843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,fp8,0,0.20973332722981772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,float16,0,0.13693867127100626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,fp8,0,0.13845866918563843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,128,0,1,fp8,fp8,0,0.12762133280436197
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,fp8,0,0.13711466391881308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,float16,0,0.13808533549308777
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,128,0,1,fp8,fp8,0,0.12444800138473511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,float16,0,0.13801599542299905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,fp8,0,0.13589333494504294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,128,0,1,fp8,fp8,0,0.1237546702226003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,float16,0,0.10319999853769939
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,float16,0,0.10322667161623637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,fp8,0,0.10526399811108907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,128,0,1,fp8,fp8,0,0.09893866380055745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,128,0,1,fp8,fp8,0,0.096778670946757
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,float16,0,0.10317867000897725
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,fp8,0,0.1032373309135437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,128,0,1,fp8,fp8,0,0.09627733627955119
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,fp8,0,0.10520000259081523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,float16,0,0.6646933158238729
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,fp8,0,0.6641013224919637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,128,0,1,fp8,fp8,0,0.5940800110499064
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,128,0,1,fp8,fp8,0,0.6063733498255411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,fp8,0,0.3779040177663167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,float16,0,0.6790133317311605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,fp8,0,0.6806026299794515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,float16,0,0.3741653362909953
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,128,0,1,fp8,fp8,0,0.3387146790822347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,float16,0,0.3490133285522461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,fp8,0,0.3533866802851359
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,128,0,1,fp8,fp8,0,0.31908265749613446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,fp8,0,0.21195733547210693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,float16,0,0.35804800192515057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,fp8,0,0.3609280188878377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,128,0,1,fp8,fp8,0,0.32686400413513184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,float16,0,0.21147199471791586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,128,0,1,fp8,fp8,0,0.1954773267110189
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,float16,0,0.19335466623306274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,fp8,0,0.19527999560038248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,128,0,1,fp8,fp8,0,0.179530660311381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,float16,0,0.2009119987487793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,fp8,0,0.20278932650883993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,fp8,0,0.1239946683247884
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,128,0,1,fp8,fp8,0,0.18634132544199625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,float16,0,0.12850667039553323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,fp8,0,0.12610133488972983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,fp8,0,0.12780267000198364
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,128,0,1,fp8,fp8,0,0.12070399522781372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,float16,0,0.12243200341860454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,128,0,1,fp8,fp8,0,0.11204266548156738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,float16,0,0.0786240001519521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,float16,0,0.12405866384506226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,128,0,1,fp8,fp8,0,0.11351466178894043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,float16,0,0.08067200084527333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,fp8,0,0.08172800143559773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,fp8,0,0.08056533336639404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,128,0,1,fp8,fp8,0,0.07629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,fp8,0,0.07871466875076294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,128,0,1,fp8,fp8,0,0.07452799876530965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,float16,0,0.0804746647675832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,128,0,1,fp8,fp8,0,0.07553066809972127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,float16,0,0.07458666463692983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,fp8,0,0.07419733206431071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,128,0,1,fp8,fp8,0,0.07032533486684163
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,fp8,0,0.07427200178305308
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,float16,0,0.07445333401362102
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,fp8,0,0.07419733206431071
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,128,0,1,fp8,fp8,0,0.07017066578070323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,float16,0,0.0743146687746048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,128,0,1,fp8,fp8,0,0.068271999557813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,float16,0,0.4355893135070801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,fp8,0,0.43780267238616943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,128,0,1,fp8,fp8,0,0.3938719828923543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,float16,0,0.4475413163503011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,fp8,0,0.44780266284942627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,float16,0,0.25305600961049396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,128,0,1,fp8,fp8,0,0.4045013189315796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,fp8,0,0.254805326461792
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,128,0,1,fp8,fp8,0,0.23256532351175943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,float16,0,0.23388266563415527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,fp8,0,0.23456533749898276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,128,0,1,fp8,fp8,0,0.21460266908009848
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,float16,0,0.24043200413386026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,fp8,0,0.24220265944798788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,128,0,1,fp8,fp8,0,0.2188640038172404
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,float16,0,0.14550933241844177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,fp8,0,0.1471680005391439
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,float16,0,0.13566933075586954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,128,0,1,fp8,fp8,0,0.13666666547457376
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,float16,0,0.13146666685740152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,fp8,0,0.1322879989941915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,128,0,1,fp8,fp8,0,0.12052800258000691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,fp8,0,0.13760000467300415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,128,0,1,fp8,fp8,0,0.12732799847920737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,float16,0,0.08833600083986919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,fp8,0,0.09093333284060161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,128,0,1,fp8,fp8,0,0.08277333279450734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,float16,0,0.0867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,fp8,0,0.08847999572753906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,128,0,1,fp8,fp8,0,0.08002133170763652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,128,0,1,fp8,fp8,0,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,float16,0,0.06404266754786174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,float16,0,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,fp8,0,0.0881760021050771
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,128,0,1,fp8,fp8,0,0.07926933467388153
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,fp8,0,0.06407466530799866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,float16,0,0.06400533517201741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,fp8,0,0.06422399977842967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,128,0,1,fp8,fp8,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,128,0,1,fp8,fp8,0,0.058602665861447654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,float16,0,0.06438933312892914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,128,0,1,fp8,fp8,0,0.05957333246866862
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,float16,0,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,fp8,0,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,fp8,0,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,float16,0,0.06121600170930227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,fp8,0,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,128,0,1,fp8,fp8,0,0.056143999099731445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,float16,0,0.05981333553791046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,128,0,1,fp8,fp8,0,0.05575466652711233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,float16,0,0.4994613329569499
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,fp8,0,0.5010826587677002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,128,0,1,fp8,fp8,0,0.44973333676656085
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,float16,0,0.5083093245824178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,fp8,0,0.5092000166575114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,float16,0,0.2834239999453227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,128,0,1,fp8,fp8,0,0.45771201451619464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,fp8,0,0.28248000144958496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,128,0,1,fp8,fp8,0,0.25845867395401
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,float16,0,0.257152001063029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,fp8,0,0.25854400793711346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,128,0,1,fp8,fp8,0,0.2357813318570455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,128,0,1,fp8,fp8,0,0.14495467146237692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,float16,0,0.26741333802541095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,fp8,0,0.26808534065882367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,128,0,1,fp8,fp8,0,0.24321067333221436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,float16,0,0.15618133544921875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,fp8,0,0.15738133589426676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,float16,0,0.13797332843144736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,128,0,1,fp8,fp8,0,0.13589866956075033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,fp8,0,0.13666133085886636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,128,0,1,fp8,fp8,0,0.1295253336429596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,float16,0,0.0842186709245046
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,float16,0,0.14621333281199136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,fp8,0,0.14621866742769876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,float16,0,0.08917867143948872
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,fp8,0,0.08914666374524434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,128,0,1,fp8,fp8,0,0.08637866377830505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,fp8,0,0.08584533135096233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,128,0,1,fp8,fp8,0,0.0746506651242574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,128,0,1,fp8,fp8,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,float16,0,0.08455999692281087
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,fp8,0,0.08655466636021932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,128,0,1,fp8,fp8,0,0.07761066655317943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,float16,0,0.05460800230503082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,fp8,0,0.05503466725349426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,float16,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,128,0,1,fp8,fp8,0,0.04952533543109894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,float16,0,0.05379733443260193
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,128,0,1,fp8,fp8,0,0.05018133421738943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,float16,0,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,fp8,0,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,float16,0,0.04775466521581014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,128,0,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,float16,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,fp8,0,0.04801600178082784
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,128,0,1,fp8,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,fp8,0,0.04795733094215393
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,128,0,1,fp8,fp8,0,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,float16,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,fp8,0,0.04565866788228353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,128,0,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,float16,0,0.04565866788228353
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,128,0,1,fp8,fp8,0,0.04205333193143209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,float16,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,fp8,0,0.045610666275024414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,128,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,float16,0,0.3298986752827962
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,fp8,0,0.3312266667683919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,128,0,1,fp8,fp8,0,0.29764799276987713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,float16,0,0.3349706729253133
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,fp8,0,0.33987732728322345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,float16,0,0.1932906707127889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,128,0,1,fp8,fp8,0,0.3073493242263794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,fp8,0,0.19698667526245117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,128,0,1,fp8,fp8,0,0.17994133631388345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,float16,0,0.1772480010986328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,fp8,0,0.1765013337135315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,128,0,1,fp8,fp8,0,0.16120533148447672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,float16,0,0.18179200092951456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,fp8,0,0.18200532595316568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,128,0,1,fp8,fp8,0,0.168938676516215
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,float16,0,0.10812266667683919
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,fp8,0,0.11125333110491435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,128,0,1,fp8,fp8,0,0.10310399532318115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,128,0,1,fp8,fp8,0,0.09563199679056804
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,float16,0,0.09528533617655437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,fp8,0,0.09700799981753032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,128,0,1,fp8,fp8,0,0.08954133590062459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,float16,0,0.061941335598627724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,float16,0,0.09774399797121684
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,fp8,0,0.10084266463915507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,float16,0,0.06435733536879222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,fp8,0,0.0654666672150294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,128,0,1,fp8,fp8,0,0.057909334699312844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,float16,0,0.04567466676235199
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,128,0,1,fp8,fp8,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,fp8,0,0.06316266457239787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,128,0,1,fp8,fp8,0,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,float16,0,0.0620959997177124
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,fp8,0,0.06400000055631001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,fp8,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,128,0,1,fp8,fp8,0,0.043231998880704246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,float16,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,128,0,1,fp8,fp8,0,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,float16,0,0.0439573327700297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,float16,0,0.04033066580692927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,128,0,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,float16,0,0.04048000027736028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,fp8,0,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,128,0,1,fp8,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,128,0,1,fp8,fp8,0,0.03730133424202601
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,float16,0,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,128,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,float16,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,128,0,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,128,0,1,fp8,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,float16,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,128,0,1,fp8,fp8,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,fp8,0,0.039034667114416756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,float16,0,0.3471519947052002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,fp8,0,0.34353065490722656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,128,0,1,fp8,fp8,0,0.33847999572753906
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,fp8,0,0.19646932681401572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,float16,0,0.3527626593907674
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,fp8,0,0.3479679822921753
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,128,0,1,fp8,fp8,0,0.348688006401062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,128,0,1,fp8,fp8,0,0.1810986598332723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,float16,0,0.19839467604955038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,128,0,1,fp8,fp8,0,0.19324266910552979
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,fp8,0,0.18406933546066284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,float16,0,0.18288000424702963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,fp8,0,0.1789813240369161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,float16,0,0.18718934059143066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,fp8,0,0.09753599762916565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,float16,0,0.1118239959081014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,128,0,1,fp8,fp8,0,0.18475200732549033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,fp8,0,0.10921066999435425
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,128,0,1,fp8,fp8,0,0.10778133074442546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,float16,0,0.09868799646695454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,fp8,0,0.06228266656398773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,128,0,1,fp8,fp8,0,0.09469333291053772
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,float16,0,0.10257599751154582
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,float16,0,0.0598826656738917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,fp8,0,0.1019040048122406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,128,0,1,fp8,fp8,0,0.10129599769910176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,float16,0,0.06364800035953522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,128,0,1,fp8,fp8,0,0.0622026671965917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,fp8,0,0.05843733251094818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,128,0,1,fp8,fp8,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,float16,0,0.060266668597857155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,fp8,0,0.06127466758092245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,128,0,1,fp8,fp8,0,0.05593066910902659
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,128,0,1,fp8,fp8,0,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,float16,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,128,0,1,fp8,fp8,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,float16,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,fp8,0,0.038693333665529885
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,128,0,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,float16,0,0.03305600086847941
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,128,0,1,fp8,fp8,0,0.03217600037654241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,float16,0,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,fp8,0,0.03317866722742716
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,128,0,1,fp8,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,128,0,1,fp8,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,128,0,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,float16,0,0.03046400099992752
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,fp8,0,0.031194667021433514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,float16,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,128,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,float16,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,128,0,1,fp8,fp8,0,0.02849599967400233
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,128,0,1,fp8,fp8,0,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,128,0,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,fp8,0,0.027679999669392902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,128,0,1,fp8,fp8,0,0.297925333182017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,128,0,1,fp8,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,float16,0,0.29772265752156574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,fp8,0,0.2937386631965637
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,float16,0,0.3068959911664327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,fp8,0,0.302784005800883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,float16,0,0.1721173326174418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,fp8,0,0.15503467122713724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,fp8,0,0.1717066764831543
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,4,2,128,0,1,fp8,fp8,0,0.3025439977645874
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,4,128,0,1,fp8,fp8,0,0.16832532485326132
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,float16,0,0.15813866257667542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,1,128,0,1,fp8,fp8,0,0.15753600001335144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,float16,0,0.16268799702326456
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,fp8,0,0.16023466984430948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,4,2,128,0,1,fp8,fp8,0,0.16033599774042764
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,float16,0,0.09809600313504536
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,fp8,0,0.09475732843081157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,4,128,0,1,fp8,fp8,0,0.09397866328557332
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,128,0,1,fp8,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,float16,0,0.08339732885360718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,fp8,0,0.08378666639328003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,1,128,0,1,fp8,fp8,0,0.08175999919573466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,float16,0,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,fp8,0,0.08518399794896443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,fp8,0,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,4,128,0,1,fp8,fp8,0,0.05312533179918925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,float16,0,0.049839998284975685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,fp8,0,0.05035200218359629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,1,128,0,1,fp8,fp8,0,0.04750399788220724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,float16,0,0.049866666396458946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,fp8,0,0.051632001996040344
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,4,2,128,0,1,fp8,fp8,0,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,float16,0,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,4,128,0,1,fp8,fp8,0,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,128,0,1,fp8,fp8,0,0.031194667021433514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,1,128,0,1,fp8,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,float16,0,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,float16,0,0.02699733277161916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,4,128,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,128,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,float16,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,1,128,0,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,float16,0,0.026501332720120747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,float16,0,0.02477866659561793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,4,128,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,1,128,0,1,fp8,fp8,0,0.022517333428064983
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,4,2,128,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,4,128,0,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,1,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,float16,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,4,2,128,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,4,128,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,1,128,0,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,4,2,128,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,float16,0,0.13803733388582864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,fp8,0,0.1356000006198883
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,1,128,0,1,fp8,fp8,0,0.14661332964897156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,float16,0,0.1458506683508555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,fp8,0,0.13986133535703024
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,4,2,128,0,1,fp8,fp8,0,0.14904533823331198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,float16,0,0.08957333366076152
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,fp8,0,0.08923733234405518
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,4,128,0,1,fp8,fp8,0,0.08644800384839375
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,float16,0,0.07428266604741414
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,fp8,0,0.07417599856853485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,1,128,0,1,fp8,fp8,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,float16,0,0.07791466514269511
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,fp8,0,0.07725866635640462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,4,2,128,0,1,fp8,fp8,0,0.08266133566697438
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,128,0,1,fp8,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,float16,0,0.04752533137798309
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,fp8,0,0.046154667933781944
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,4,128,0,1,fp8,fp8,0,0.04790933430194855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,float16,0,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,128,0,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,fp8,0,0.04372266431649526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,float16,0,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,fp8,0,0.0440586656332016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,4,2,128,0,1,fp8,fp8,0,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,float16,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,1,128,0,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,fp8,0,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,4,2,128,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,4,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,128,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,1,128,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,float16,0,0.022277332842350006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,float16,0,0.024400000770886738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,4,2,128,0,1,fp8,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,1,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,4,2,128,0,1,fp8,fp8,0,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,4,128,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,float16,0,0.02056533346573512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,4,2,128,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,4,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,1,128,0,1,fp8,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,4,2,128,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,4,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,float16,0,0.08006933331489563
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,float16,0,0.08140266438325246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,4,2,128,0,1,fp8,fp8,0,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,fp8,0,0.07890133559703827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,1,128,0,1,fp8,fp8,0,0.0792799989382426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,fp8,0,0.08029333253701527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,float16,0,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,4,2,128,0,1,fp8,fp8,0,0.08516800403594971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,fp8,0,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,4,128,0,1,fp8,fp8,0,0.05030933519204458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,float16,0,0.047423998514811196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,fp8,0,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,1,128,0,1,fp8,fp8,0,0.04364799956480662
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,float16,0,0.04645333190759023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,128,0,1,fp8,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,fp8,0,0.04621333380540212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,4,2,128,0,1,fp8,fp8,0,0.04471466441949209
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,float16,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,1,128,0,1,fp8,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,float16,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,4,2,128,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,float16,0,0.02233600119749705
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,4,128,0,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,1,128,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,4,2,128,0,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,4,128,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,4,2,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,4,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,4,2,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,128,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,4,128,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,4,2,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,4,128,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,1,128,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,4,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,1,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,float16,0,0.015935999651749928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,4,2,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,float16,0,0.056885331869125366
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,fp8,0,0.05585599939028422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,1,128,0,1,fp8,fp8,0,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,float16,0,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,4,2,128,0,1,fp8,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,4,128,0,1,fp8,fp8,0,0.03631466627120972
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,float16,0,0.03711466739575068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,1,128,0,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,float16,0,0.03748266647259394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,4,2,128,0,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,4,128,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,1,128,0,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,fp8,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,4,2,128,0,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,4,128,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,128,0,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,4,128,0,1,fp8,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,1,128,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,4,2,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,4,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,fp8,0,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,1,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,4,2,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,float16,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,4,128,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,1,128,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,4,2,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,4,128,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,1,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,4,2,128,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,4,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,1,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,float16,0,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,128,0,1,fp8,fp8,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,float16,0,0.046336000164349876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,4,2,128,0,1,fp8,fp8,0,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,float16,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,fp8,0,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,4,2,128,0,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,float16,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,fp8,0,0.030773334205150604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,4,128,0,1,fp8,fp8,0,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,128,0,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,float16,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,fp8,0,0.03121600051720937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,1,128,0,1,fp8,fp8,0,0.028768000503381092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,float16,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,4,128,0,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,float16,0,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,128,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,1,128,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,4,2,128,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,float16,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,1,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,4,2,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,4,128,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,1,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,4,2,128,0,1,fp8,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,4,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,4,2,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,4,128,0,1,fp8,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,1,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,float16,0,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,4,2,128,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,fp8,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,4,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,1,128,0,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,4,2,128,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,4,128,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,1,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,4,2,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,float16,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,fp8,0,0.040618665516376495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,1,128,0,1,fp8,fp8,0,0.038106667498747505
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,float16,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,4,2,128,0,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,4,128,0,1,fp8,fp8,0,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,1,128,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,4,2,128,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,4,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,1,128,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,4,2,128,0,1,fp8,fp8,0,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,float16,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,4,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,1,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,4,2,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,float16,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,1,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,4,2,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,4,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,float16,0,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,1,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,4,2,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,float16,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,4,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,1,128,0,1,fp8,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,4,2,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,4,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,float16,0,0.01639466608564059
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,1,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,float16,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,4,2,128,0,1,fp8,fp8,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,4,128,0,1,fp8,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,1,128,0,1,fp8,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,4,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,float16,0,0.98908797899882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,fp8,0,1.003775993982951
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,float16,0,0.5657866795857748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16384,2,1,128,0,1,fp8,fp8,0,0.8617599805196127
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,fp8,0,0.5689386526743571
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,2,128,0,1,fp8,fp8,0,0.5075786511103312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,float16,0,0.5592000087102255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,fp8,0,0.5564159949620565
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,float16,0,0.3479573329289754
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,2,1,128,0,1,fp8,fp8,0,0.5150826772054037
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,fp8,0,0.3505386511484782
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,2,128,0,1,fp8,fp8,0,0.3220799962679545
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,float16,0,0.3470613161722819
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,fp8,0,0.22915732860565186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,fp8,0,0.3465973138809204
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,2,1,128,0,1,fp8,fp8,0,0.31354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,float16,0,0.2302239934603373
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,2,128,0,1,fp8,fp8,0,0.21357333660125732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,float16,0,0.22850666443506876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,fp8,0,0.23015999794006348
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,2,1,128,0,1,fp8,fp8,0,0.21018133560816446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,float16,0,0.6079733371734619
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,fp8,0,0.36130134264628094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,fp8,0,0.6090826590855917
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,12288,2,1,128,0,1,fp8,fp8,0,0.5410720109939575
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,float16,0,0.3585333426793416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,fp8,0,0.3485706647237142
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,2,128,0,1,fp8,fp8,0,0.3261546691258748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,float16,0,0.34909331798553467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,2,1,128,0,1,fp8,fp8,0,0.3186239997545878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,fp8,0,0.228112002213796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,float16,0,0.22780799865722656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,fp8,0,0.22963200012842813
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,2,128,0,1,fp8,fp8,0,0.20816532770792642
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,float16,0,0.22643733024597168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,2,1,128,0,1,fp8,fp8,0,0.20409067471822104
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,float16,0,0.17457065979639688
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,fp8,0,0.17528533935546875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,2,128,0,1,fp8,fp8,0,0.161189337571462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,float16,0,0.17499200503031412
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,fp8,0,0.4498026768366496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,fp8,0,0.17541333039601645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,2,1,128,0,1,fp8,fp8,0,0.16166399916013083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,float16,0,0.45289067427317303
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,10240,2,1,128,0,1,fp8,fp8,0,0.40757866700490314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,fp8,0,0.2708746592203776
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,float16,0,0.2759786645571391
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,float16,0,0.16901866594950357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,fp8,0,0.27804799874623615
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,128,0,1,fp8,fp8,0,0.15544533729553223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,2,128,0,1,fp8,fp8,0,0.2566346724828084
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,float16,0,0.26898133754730225
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,2,1,128,0,1,fp8,fp8,0,0.24633600314458212
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,fp8,0,0.1718026598294576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,float16,0,0.16904000441233316
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,fp8,0,0.16869332393010458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,2,1,128,0,1,fp8,fp8,0,0.15435199936230978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,float16,0,0.1488800048828125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,fp8,0,0.14805866281191507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,2,128,0,1,fp8,fp8,0,0.13805866241455078
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,float16,0,0.14669866363207498
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,fp8,0,0.14819733301798502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,2,1,128,0,1,fp8,fp8,0,0.13773333032925925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,float16,0,0.32892799377441406
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,float16,0,0.5694400072097778
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,fp8,0,0.5696800152460734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,8192,2,1,128,0,1,fp8,fp8,0,0.5072853167851766
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,fp8,0,0.3290826678276062
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,2,128,0,1,fp8,fp8,0,0.29966400067011517
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,float16,0,0.3178880016009013
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,fp8,0,0.31787200768788654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,2,1,128,0,1,fp8,fp8,0,0.29051733016967773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,float16,0,0.20228266716003418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,fp8,0,0.20535467068354288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,2,128,0,1,fp8,fp8,0,0.18714666366577148
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,128,0,1,fp8,fp8,0,0.11958400408426921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,float16,0,0.19613866011301676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,fp8,0,0.19951466719309488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,2,1,128,0,1,fp8,fp8,0,0.1809813380241394
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,float16,0,0.12918933232625326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,fp8,0,0.1297813355922699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,float16,0,0.12981866796811423
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,fp8,0,0.12788266936937967
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,2,1,128,0,1,fp8,fp8,0,0.11860266327857971
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,float16,0,0.12171199917793274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,fp8,0,0.12338133653004964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,2,128,0,1,fp8,fp8,0,0.1134986678759257
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,float16,0,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,fp8,0,0.12175466616948445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,2,1,128,0,1,fp8,fp8,0,0.11347200473149617
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,float16,0,0.21516267458597818
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,float16,0,0.36246931552886963
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,128,0,1,fp8,fp8,0,0.1993173360824585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,fp8,0,0.3631306489308675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,6144,2,1,128,0,1,fp8,fp8,0,0.33001599709192914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,fp8,0,0.21571733554204306
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,fp8,0,0.20779200394948324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,float16,0,0.2060640056927999
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,2,1,128,0,1,fp8,fp8,0,0.19020267327626547
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,float16,0,0.13362133502960205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,128,0,1,fp8,fp8,0,0.12382933497428894
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,fp8,0,0.13589333494504294
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,float16,0,0.13182399670283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,fp8,0,0.13531733552614847
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,2,1,128,0,1,fp8,fp8,0,0.12171199917793274
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,float16,0,0.09981333216031392
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,fp8,0,0.10084266463915507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,2,128,0,1,fp8,fp8,0,0.09478400150934856
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,float16,0,0.09914132952690125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,fp8,0,0.10048000017801921
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,2,1,128,0,1,fp8,fp8,0,0.0929813285668691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,float16,0,0.09609599908192952
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,fp8,0,0.09567466378211975
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,2,128,0,1,fp8,fp8,0,0.08900266885757446
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,float16,0,0.094842662413915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,float16,0,0.3724000056584676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,fp8,0,0.09487467010815938
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,2,1,128,0,1,fp8,fp8,0,0.08814932902654012
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,fp8,0,0.37172265847524005
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,float16,0,0.20524799823760986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,float16,0,0.21556266148885092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,4096,2,1,128,0,1,fp8,fp8,0,0.3380213181177775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,fp8,0,0.2176426649093628
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,2,128,0,1,fp8,fp8,0,0.19846399625142416
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,fp8,0,0.20429333051045737
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,2,1,128,0,1,fp8,fp8,0,0.18854933977127075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,float16,0,0.12640000383059183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,fp8,0,0.12778133153915405
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,2,128,0,1,fp8,fp8,0,0.12081066767374675
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,float16,0,0.1216373344262441
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,float16,0,0.07799466451009114
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,fp8,0,0.12549333771069845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,2,1,128,0,1,fp8,fp8,0,0.11316800117492676
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,float16,0,0.07250666618347168
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,float16,0,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,fp8,0,0.07855466504891713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,2,128,0,1,fp8,fp8,0,0.07420266668001811
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,fp8,0,0.07866666714350383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,2,1,128,0,1,fp8,fp8,0,0.07349333167076111
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,float16,0,0.06845866640408833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,fp8,0,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,2,128,0,1,fp8,fp8,0,0.06755733489990234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,float16,0,0.07241066793600719
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,fp8,0,0.0683840016523997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,128,0,1,fp8,fp8,0,0.06399466594060262
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,fp8,0,0.0724480003118515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,2,1,128,0,1,fp8,fp8,0,0.06640000144640605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,fp8,0,0.24465066194534302
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,fp8,0,0.06833066542943318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,float16,0,0.14619200428326926
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,2,128,0,1,fp8,fp8,0,0.06436799963315327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,float16,0,0.06846400101979573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,float16,0,0.24503467480341592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,3072,2,1,128,0,1,fp8,fp8,0,0.22483734289805093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,fp8,0,0.14814933141072592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,2,128,0,1,fp8,fp8,0,0.13743467132250467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,float16,0,0.13662399848302206
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,fp8,0,0.13658666610717773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,2,1,128,0,1,fp8,fp8,0,0.12852799892425537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,float16,0,0.08820799986521403
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,fp8,0,0.08985066413879395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,2,128,0,1,fp8,fp8,0,0.08237333099047343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,128,0,1,fp8,fp8,0,0.06003733476003011
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,float16,0,0.08655466636021932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,fp8,0,0.08733866612116496
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,2,1,128,0,1,fp8,fp8,0,0.08078399797280629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,float16,0,0.062074666221936546
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,fp8,0,0.06403733293215434
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,float16,0,0.0621066689491272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,fp8,0,0.06345599889755249
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,fp8,0,0.05787200232346853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,2,1,128,0,1,fp8,fp8,0,0.05909866591294607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,float16,0,0.05771199862162272
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,fp8,0,0.05996799965699514
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,2,128,0,1,fp8,fp8,0,0.054805333415667214
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,float16,0,0.055770665407180786
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,float16,0,0.05819733440876007
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,2,1,128,0,1,fp8,fp8,0,0.053685332338015236
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,float16,0,0.05588266750176748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,fp8,0,0.0558240016301473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,2,128,0,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,fp8,0,0.05619200070699056
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,2,1,128,0,1,fp8,fp8,0,0.05198400219281515
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,float16,0,0.26493332783381146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,fp8,0,0.26789865891138714
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,2048,2,1,128,0,1,fp8,fp8,0,0.24593067169189453
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,float16,0,0.15650666753451029
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,fp8,0,0.1572053333123525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,2,128,0,1,fp8,fp8,0,0.14564266800880432
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,float16,0,0.14670933286348978
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,fp8,0,0.14657599727312723
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,2,1,128,0,1,fp8,fp8,0,0.13598933815956116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,float16,0,0.08916266759236653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,fp8,0,0.09026133020718892
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,2,128,0,1,fp8,fp8,0,0.08552533388137817
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,fp8,0,0.055919999877611794
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,float16,0,0.08538666367530823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,fp8,0,0.08649599552154541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,2,1,128,0,1,fp8,fp8,0,0.07824000219504039
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,float16,0,0.054832001527150474
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,2,128,0,1,fp8,fp8,0,0.051728000243504844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,float16,0,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,2,1,128,0,1,fp8,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,fp8,0,0.04752000172932943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,128,0,1,fp8,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,float16,0,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,fp8,0,0.04937600096066793
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,2,128,0,1,fp8,fp8,0,0.04494933287302653
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,float16,0,0.043738668163617454
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,float16,0,0.047797332207361855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,float16,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,fp8,0,0.04377066592375437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,2,128,0,1,fp8,fp8,0,0.041322665909926094
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,fp8,0,0.043525333205858864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,2,1,128,0,1,fp8,fp8,0,0.039877332746982574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,fp8,0,0.04348266621430715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,2,128,0,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,2,1,128,0,1,fp8,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,float16,0,0.18070934216181436
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,fp8,0,0.18272000551223755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1536,2,1,128,0,1,fp8,fp8,0,0.16740800937016806
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,float16,0,0.1090186635653178
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,fp8,0,0.11121066411336263
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,2,128,0,1,fp8,fp8,0,0.10358933607737224
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,float16,0,0.10214400291442871
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,fp8,0,0.10102933645248413
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,2,1,128,0,1,fp8,fp8,0,0.09539199868837993
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,float16,0,0.06392533580462138
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,fp8,0,0.06618133187294006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,2,128,0,1,fp8,fp8,0,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,128,0,1,fp8,fp8,0,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,float16,0,0.06233599781990051
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,fp8,0,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,2,1,128,0,1,fp8,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,128,0,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,float16,0,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,fp8,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,float16,0,0.04513599971930186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,2,1,128,0,1,fp8,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,float16,0,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,float16,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,2,1,128,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,float16,0,0.03741333385308584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,fp8,0,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,2,128,0,1,fp8,fp8,0,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,128,0,1,fp8,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,float16,0,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,fp8,0,0.03632533301909765
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,2,1,128,0,1,fp8,fp8,0,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,float16,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,float16,0,0.1844373345375061
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,fp8,0,0.035904000202814736
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,float16,0,0.035690667728583016
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,2,1,128,0,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,128,0,1,fp8,fp8,0,0.1076693336168925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,fp8,0,0.18348799149195352
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,1024,2,1,128,0,1,fp8,fp8,0,0.18370133638381958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,float16,0,0.11173866192499797
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,fp8,0,0.10730666915575664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,float16,0,0.10249066352844238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,fp8,0,0.10083199540774028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,2,1,128,0,1,fp8,fp8,0,0.1013866662979126
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,128,0,1,fp8,fp8,0,0.055786664287249245
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,float16,0,0.06218666831652323
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,fp8,0,0.06297066807746887
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,2,128,0,1,fp8,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,float16,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,fp8,0,0.058864002426465355
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,128,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,float16,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,2,128,0,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,float16,0,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,fp8,0,0.032799998919169106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,128,0,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,fp8,0,0.038480001191298165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,float16,0,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,2,128,0,1,fp8,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,float16,0,0.032645332316557564
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,float16,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,2,128,0,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,float16,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,fp8,0,0.02991466720898946
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,2,1,128,0,1,fp8,fp8,0,0.02807466685771942
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,float16,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,2,128,0,1,fp8,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,128,0,1,fp8,fp8,0,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,float16,0,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,fp8,0,0.02886933336655299
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,2,1,128,0,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,float16,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,float16,0,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,2,1,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,float16,0,0.159578671058019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,fp8,0,0.1574666698773702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,float16,0,0.09477866689364116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,512,2,1,128,0,1,fp8,fp8,0,0.16103466351826987
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,fp8,0,0.0939626693725586
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,2,128,0,1,fp8,fp8,0,0.09300800164540608
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,fp8,0,0.05218133330345154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,float16,0,0.08608532945315044
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,fp8,0,0.08475733796755473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,2,1,128,0,1,fp8,fp8,0,0.08820266524950664
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,float16,0,0.052602668603261314
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,2,128,0,1,fp8,fp8,0,0.053114667534828186
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,128,0,1,fp8,fp8,0,0.03182400017976761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,float16,0,0.050026665131251015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,fp8,0,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,2,1,128,0,1,fp8,fp8,0,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,float16,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,float16,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,2,1,128,0,1,fp8,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,2,128,0,1,fp8,fp8,0,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,float16,0,0.026869334280490875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,fp8,0,0.026799999177455902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,2,1,128,0,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,float16,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,2,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,float16,0,0.023658665517965954
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,2,1,128,0,1,fp8,fp8,0,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,2,128,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,fp8,0,0.022917332748572033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,2,1,128,0,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,2,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,2,1,128,0,1,fp8,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,2,128,0,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,fp8,0,0.07626666625340779
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,fp8,0,0.044437333941459656
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,2,1,128,0,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,float16,0,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,float16,0,0.04518933097521464
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,256,2,1,128,0,1,fp8,fp8,0,0.08200533191363017
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,2,128,0,1,fp8,fp8,0,0.04877333343029022
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,float16,0,0.04398400088151296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,2,1,128,0,1,fp8,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,2,128,0,1,fp8,fp8,0,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,float16,0,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,fp8,0,0.02829866607983907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,2,1,128,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,2,128,0,1,fp8,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,float16,0,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,2,1,128,0,1,fp8,fp8,0,0.024485332270463307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,2,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,2,1,128,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,2,128,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,2,1,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,2,128,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,2,1,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,2,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,2,1,128,0,1,fp8,fp8,0,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,2,128,0,1,fp8,fp8,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,128,0,1,fp8,fp8,0,0.0454720010360082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,2,1,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,float16,0,0.04770666857560476
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,fp8,0,0.04658666749795278
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,float16,0,0.031157332162062328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,2,128,0,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,128,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,float16,0,0.030042665700117748
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,2,1,128,0,1,fp8,fp8,0,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,float16,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,2,1,128,0,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,2,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,2,1,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,2,128,0,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,float16,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,2,1,128,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,2,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,2,1,128,0,1,fp8,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,2,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,2,128,0,1,fp8,fp8,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,2,1,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,2,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,2,1,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,float16,0,0.036373332142829895
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,64,2,1,128,0,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,2,128,0,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,float16,0,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,fp8,0,0.024149333437283833
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,2,1,128,0,1,fp8,fp8,0,0.022917332748572033
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,2,128,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,2,1,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,2,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,2,1,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,2,1,128,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,2,128,0,1,fp8,fp8,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,2,1,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,2,128,0,1,fp8,fp8,0,0.015935999651749928
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,2,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,2,128,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,2,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,2,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,2,1,128,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,128,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,float16,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,fp8,0,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,32,2,1,128,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,2,1,128,0,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,2,1,128,0,1,fp8,fp8,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,128,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,2,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,2,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,2,1,128,0,1,fp8,fp8,0,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,2,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,2,1,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,2,1,128,0,1,fp8,fp8,0,0.016149333367745083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,2,128,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,2,1,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,2,128,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,2,1,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,float16,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,256,16,2,1,128,0,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,2,128,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,2,1,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,2,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,float16,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,float16,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,2,128,0,1,fp8,fp8,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,2,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,2,128,0,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,2,1,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,2,128,0,1,fp8,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,2,128,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,fp8,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,2,1,128,0,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,float16,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,2,1,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,2,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,2,1,128,0,1,fp8,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,float16,0,0.22380266586939493
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,float16,0,0.3439360062281291
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,fp8,0,0.3473546504974365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16384,1,1,128,0,1,fp8,fp8,0,0.3128160039583842
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,fp8,0,0.22576000293095908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16384,1,1,128,0,1,fp8,fp8,0,0.2059733271598816
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,float16,0,0.21567465861638388
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,128,0,1,fp8,fp8,0,0.19955732425053915
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,fp8,0,0.2178773283958435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,float16,0,0.22392000754674277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,fp8,0,0.22470400730768839
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,12288,1,1,128,0,1,fp8,fp8,0,0.20422399044036865
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,float16,0,0.1724053422609965
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,fp8,0,0.1726026733716329
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,float16,0,0.16844799121220908
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,12288,1,1,128,0,1,fp8,fp8,0,0.15826132893562317
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,float16,0,0.16659733653068542
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,128,0,1,fp8,fp8,0,0.15402133266131082
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,fp8,0,0.16668800512949625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,fp8,0,0.16806934277216592
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,float16,0,0.14009599884351095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,fp8,0,0.1418560047944387
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,10240,1,1,128,0,1,fp8,fp8,0,0.1541706621646881
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,float16,0,0.14563733339309692
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,fp8,0,0.14621866742769876
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,10240,1,1,128,0,1,fp8,fp8,0,0.1344373325506846
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,10240,1,1,128,0,1,fp8,fp8,0,0.1316159963607788
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,float16,0,0.20167466004689535
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,fp8,0,0.2017866571744283
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,8192,1,1,128,0,1,fp8,fp8,0,0.18874132633209229
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,float16,0,0.12755200266838074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,fp8,0,0.12893866499265036
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,8192,1,1,128,0,1,fp8,fp8,0,0.11734400192896526
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,float16,0,0.11999467015266418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,fp8,0,0.11962667107582092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,8192,1,1,128,0,1,fp8,fp8,0,0.11141866445541382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,fp8,0,0.13598933815956116
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,float16,0,0.11544000109036763
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,fp8,0,0.11931199828783672
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,8192,1,1,128,0,1,fp8,fp8,0,0.10731732845306396
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,float16,0,0.09269332885742188
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,float16,0,0.13436800241470337
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,6144,1,1,128,0,1,fp8,fp8,0,0.12443199753761292
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,float16,0,0.09144533673922221
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,float16,0,0.09912000099817912
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,fp8,0,0.09850133458773296
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,6144,1,1,128,0,1,fp8,fp8,0,0.09089066584904988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,fp8,0,0.09737066427866618
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,6144,1,1,128,0,1,fp8,fp8,0,0.08745599786440532
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,float16,0,0.07870933413505554
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,fp8,0,0.09070400396982829
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,6144,1,1,128,0,1,fp8,fp8,0,0.08612266182899475
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,float16,0,0.12851732969284058
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,fp8,0,0.12796800335248312
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,4096,1,1,128,0,1,fp8,fp8,0,0.11931733290354411
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,fp8,0,0.08145066599051158
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,4096,1,1,128,0,1,fp8,fp8,0,0.07462933162848155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,float16,0,0.07217599948247273
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,fp8,0,0.07235200206438701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,4096,1,1,128,0,1,fp8,fp8,0,0.06683200101057689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,128,0,1,fp8,fp8,0,0.06193066636721293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,float16,0,0.06806399921576183
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,fp8,0,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,fp8,0,0.068122665087382
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,4096,1,1,128,0,1,fp8,fp8,0,0.06403199831644694
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,float16,0,0.06743999818960826
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,fp8,0,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,float16,0,0.08860799670219421
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,3072,1,1,128,0,1,fp8,fp8,0,0.08310399949550629
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,float16,0,0.0636106679836909
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,fp8,0,0.06400000055631001
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,3072,1,1,128,0,1,fp8,fp8,0,0.05886933207511902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,128,0,1,fp8,fp8,0,0.05041599770387014
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,float16,0,0.057114665706952415
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,fp8,0,0.05795733133951823
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,3072,1,1,128,0,1,fp8,fp8,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,float16,0,0.055045331517855324
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,float16,0,0.05366933345794678
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,float16,0,0.05390933156013489
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,3072,1,1,128,0,1,fp8,fp8,0,0.05006400247414907
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,float16,0,0.08771199981371562
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,fp8,0,0.09065066774686177
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,2048,1,1,128,0,1,fp8,fp8,0,0.08452799916267395
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,fp8,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,2048,1,1,128,0,1,fp8,fp8,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,fp8,0,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,2048,1,1,128,0,1,fp8,fp8,0,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,float16,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,2048,1,1,128,0,1,fp8,fp8,0,0.04141866664091746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,float16,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,fp8,0,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,2048,1,1,128,0,1,fp8,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,float16,0,0.040261333187421165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,fp8,0,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,2048,1,1,128,0,1,fp8,fp8,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,float16,0,0.06424533327420552
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,fp8,0,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1536,1,1,128,0,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,float16,0,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,float16,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,fp8,0,0.04515199859937032
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1536,1,1,128,0,1,fp8,fp8,0,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,float16,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1536,1,1,128,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1536,1,1,128,0,1,fp8,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,float16,0,0.0351200004418691
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1536,1,1,128,0,1,fp8,fp8,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,fp8,0,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,fp8,0,0.04074666649103165
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1536,1,1,128,0,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,float16,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,fp8,0,0.06193066636721293
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,1024,1,1,128,0,1,fp8,fp8,0,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,float16,0,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,1024,1,1,128,0,1,fp8,fp8,0,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,128,0,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,float16,0,0.03175999969244003
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,1024,1,1,128,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,float16,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,1024,1,1,128,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,float16,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,fp8,0,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,1024,1,1,128,0,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,float16,0,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,fp8,0,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,1024,1,1,128,0,1,fp8,fp8,0,0.026687999566396076
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,float16,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,fp8,0,0.05078400174776713
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,128,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,512,1,1,128,0,1,fp8,fp8,0,0.05533866584300995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,float16,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,512,1,1,128,0,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,float16,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,fp8,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,512,1,1,128,0,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,float16,0,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,512,1,1,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,512,1,1,128,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,512,1,1,128,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,float16,0,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,512,1,1,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,float16,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,256,1,1,128,0,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,256,1,1,128,0,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,256,1,1,128,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,256,1,1,128,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,128,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,256,1,1,128,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,fp8,0,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,256,1,1,128,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,128,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,256,1,1,128,0,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,float16,0,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,128,1,1,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,128,1,1,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,128,1,1,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,128,1,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,128,1,1,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,128,1,1,128,0,1,fp8,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,128,1,1,128,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,fp8,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,64,1,1,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,64,1,1,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,64,1,1,128,0,1,fp8,fp8,0,0.01635733370979627
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,64,1,1,128,0,1,fp8,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,64,1,1,128,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,64,1,1,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,64,1,1,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,32,1,1,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,fp8,0,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,32,1,1,128,0,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,32,1,1,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,32,1,1,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,fp8,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,32,1,1,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,32,1,1,128,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,float16,0,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,32,1,1,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,32,1,1,128,0,1,fp8,fp8,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,float16,0,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,128,16,1,1,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,64,16,1,1,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,128,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,float16,0,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,32,16,1,1,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,8,16,1,1,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,4,16,1,1,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,2,16,1,1,128,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,float16,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,context_attention,torch_flow,1,16,1,1,128,0,1,fp8,fp8,0,0.015103999525308609
